jaulib v1.3.0
Jau Support Library (C++, Java, ..)
Uri.java
Go to the documentation of this file.
1/**
2 * Author: Sven Gothel <sgothel@jausoft.com>
3 * Copyright (c) 2021 Gothel Software e.K.
4 * Copyright (c) 2012 Gothel Software e.K.
5 * Copyright (c) 2014 JogAmp Community.
6 * Copyright (c) 2006, 2010 The Apache Software Foundation.
7 *
8 * This code is derived from or inspired by the Apache Harmony project's {@code class java.net.URI.Helper},
9 * and has been heavily modified for GlueGen/JogAmp.
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining
12 * a copy of this software and associated documentation files (the
13 * "Software"), to deal in the Software without restriction, including
14 * without limitation the rights to use, copy, modify, merge, publish,
15 * distribute, sublicense, and/or sell copies of the Software, and to
16 * permit persons to whom the Software is furnished to do so, subject to
17 * the following conditions:
18 *
19 * The above copyright notice and this permission notice shall be
20 * included in all copies or substantial portions of the Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 */
30package org.jau.net;
31
32import java.io.File;
33import java.io.UnsupportedEncodingException;
34import java.net.MalformedURLException;
35import java.net.URISyntaxException;
36import java.util.StringTokenizer;
37import java.util.regex.Pattern;
38
39/**
40 * This class implements an immutable Uri as defined by <a href="https://tools.ietf.org/html/rfc2396">RFC 2396</a>.
41 * <p>
42 * Character encoding is employed as defined by <a href="https://tools.ietf.org/html/rfc3986">RFC 3986</a>,
43 * see <a href="https://tools.ietf.org/html/rfc3986#section-2.1">RFC 3986 section 2.1</a>,
44 * while multibyte unicode characters are preserved in encoded parts.
45 * </p>
46 *
47 * <pre>
48 1 [scheme:]scheme-specific-part[#fragment]
49 2 [scheme:][//authority]path[?query][#fragment]
50 3 [scheme:][//[user-info@]host[:port]]path[?query][#fragment]
51
52 scheme-specific-part: [//authority]path[?query]
53 authority: [user-info@]host[:port]
54 * </pre>
55 * <p>
56 * <a href="https://tools.ietf.org/html/rfc3986#section-2.2">RFC 3986 section 2.2</a> <i>Reserved Characters</i> (January 2005)
57 * <table border="1">
58 <tr>
59 <td><code>!</code></td>
60 <td><code>*</code></td>
61 <td><code>'</code></td>
62 <td><code>(</code></td>
63 <td><code>)</code></td>
64 <td><code>;</code></td>
65 <td><code>:</code></td>
66 <td><code>@</code></td>
67 <td><code>&amp;</code></td>
68 <td><code>=</code></td>
69 <td><code>+</code></td>
70 <td><code>$</code></td>
71 <td><code>,</code></td>
72 <td><code>/</code></td>
73 <td><code>?</code></td>
74 <td><code>#</code></td>
75 <td><code>[</code></td>
76 <td><code>]</code></td>
77 </tr>
78 * </table>
79 * </p>
80 * <p>
81 * <a href="https://tools.ietf.org/html/rfc3986#section-2.3">RFC 3986 section 2.3</a> <i>Unreserved Characters</i> (January 2005)
82 * <table border="1">
83 <tr>
84 <td><code>A</code></td>
85 <td><code>B</code></td>
86 <td><code>C</code></td>
87 <td><code>D</code></td>
88 <td><code>E</code></td>
89 <td><code>F</code></td>
90 <td><code>G</code></td>
91 <td><code>H</code></td>
92 <td><code>I</code></td>
93 <td><code>J</code></td>
94 <td><code>K</code></td>
95 <td><code>L</code></td>
96 <td><code>M</code></td>
97 <td><code>N</code></td>
98 <td><code>O</code></td>
99 <td><code>P</code></td>
100 <td><code>Q</code></td>
101 <td><code>R</code></td>
102 <td><code>S</code></td>
103 <td><code>T</code></td>
104 <td><code>U</code></td>
105 <td><code>V</code></td>
106 <td><code>W</code></td>
107 <td><code>X</code></td>
108 <td><code>Y</code></td>
109 <td><code>Z</code></td>
110 </tr>
111 <tr>
112 <td><code>a</code></td>
113 <td><code>b</code></td>
114 <td><code>c</code></td>
115 <td><code>d</code></td>
116 <td><code>e</code></td>
117 <td><code>f</code></td>
118 <td><code>g</code></td>
119 <td><code>h</code></td>
120 <td><code>i</code></td>
121 <td><code>j</code></td>
122 <td><code>k</code></td>
123 <td><code>l</code></td>
124 <td><code>m</code></td>
125 <td><code>n</code></td>
126 <td><code>o</code></td>
127 <td><code>p</code></td>
128 <td><code>q</code></td>
129 <td><code>r</code></td>
130 <td><code>s</code></td>
131 <td><code>t</code></td>
132 <td><code>u</code></td>
133 <td><code>v</code></td>
134 <td><code>w</code></td>
135 <td><code>x</code></td>
136 <td><code>y</code></td>
137 <td><code>z</code></td>
138 </tr>
139 <tr>
140 <td><code>0</code></td>
141 <td><code>1</code></td>
142 <td><code>2</code></td>
143 <td><code>3</code></td>
144 <td><code>4</code></td>
145 <td><code>5</code></td>
146 <td><code>6</code></td>
147 <td><code>7</code></td>
148 <td><code>8</code></td>
149 <td><code>9</code></td>
150 <td><code>-</code></td>
151 <td><code>_</code></td>
152 <td><code>.</code></td>
153 <td><code>~</code></td>
154 </tr>
155 * </table>
156 * </p>
157 * <p>
158 * Other characters in a Uri must be percent encoded.
159 * </p>
160 * @since 0.3.0
161 */
162public class Uri {
163 private static final boolean DEBUG = false;
164 private static final boolean DEBUG_SHOWFIX = false;
165 /**
166 private static final boolean DEBUG;
167 private static final boolean DEBUG_SHOWFIX;
168
169 static {
170 Debug.initSingleton();
171 DEBUG = IOUtil.DEBUG || Debug.debug("Uri");
172 DEBUG_SHOWFIX = PropertyAccess.isPropertyDefined("jau.debug.Uri.ShowFix", true);
173 } */
174
175 /**
176 * Usually used to fix a path from a previously contained and opaque Uri,
177 * i.e. {@link #getContainedUri()}.
178 * <p>
179 * Such an opaque Uri w/ erroneous encoding may have been injected via
180 * {@link #valueOf(URI)} and {@link #valueOf(URL)} where the given URL or URI was opaque!
181 * </p>
182 * <p>
183 * This remedies issues when dealing w/ java URI/URL opaque sources,
184 * which do not comply to the spec, i.e. containe un-encoded chars, e.g. ':', '$', ..
185 * </p>
186 */
187 private static final int PARSE_HINT_FIX_PATH = 1 << 0;
188
189 private static final String DIGITS = "0123456789ABCDEF";
190
191 private static final String ENCODING = "UTF8";
192 private static final String MSG_ENCODING_NA = "Charset UTF8 not available";
193 private static final Pattern patternSingleFS = Pattern.compile("/{1}");
194
195 /**
196 * RFC 3986 section 2.3 Unreserved Characters (January 2005)
197 * <p>
198 * {@value} + {@code alphanum}
199 * </p>
200 */
201 public static final String UNRESERVED = "_-.~";
202 // Harmony: _ - ! . ~ ' ( ) *
203
204 private static final String punct = ",;:$&+=";
205 // Harmony: , ; : $ & + =
206
207 /**
208 * RFC 3986 section 2.2 Reserved Characters (January 2005)
209 * <p>
210 * {@value} + {@code alphanum}
211 * </p>
212 */
213 public static final String RESERVED = punct + "!*\'()@/?#[]";
214 // Harmony: , ; : $ & + = ? / [ ] @
215
216 public static final String RESERVED_2 = punct + "!*\'()@/?[]";
217 // Harmony: , ; : $ & + = ? / [ ] @
218
219 // Bug 908, issues w/ windows file path char: $ ^ ~ # [ ]
220 // Windows invalid File characters: * ? " < > |
221
222 /**
223 * Valid charset for RFC 2396 {@code authority}'s {@code user-info},
224 * additional to legal {@code alphanum} characters.
225 * <p>
226 * {@value} + {@code alphanum}
227 * </p>
228 */
229 public static final String USERINFO_LEGAL = UNRESERVED + punct;
230 // Harmony: someLegal = unreserved + punct -> _ - ! . ~ ' ( ) * , ; : $ & + =
231
232 /**
233 * Valid charset for RFC 2396 {@code authority},
234 * additional to legal {@code alphanum} characters.
235 * <p>
236 * {@value} + {@code alphanum}
237 * </p>
238 */
239 public static final String AUTHORITY_LEGAL = "@[]" + USERINFO_LEGAL;
240
241 /**
242 * Valid charset for RFC 2396 {@code path},
243 * additional to legal {@code alphanum} characters.
244 * <p>
245 * {@value} + {@code alphanum}
246 * </p>
247 */
248 public static final String PATH_LEGAL = "/!" + UNRESERVED; // no RESERVED chars but '!', to allow JAR Uris;
249 // Harmony: "/@" + unreserved + punct -> / @ _ - ! . ~ \ ' ( ) * , ; : $ & + =
250
251 /**
252 * Valid charset for RFC 2396 {@code query},
253 * additional to legal {@code alphanum} characters.
254 * <p>
255 * {@value} + {@code alphanum}
256 * </p>
257 */
258 public static final String QUERY_LEGAL = UNRESERVED + RESERVED_2 + "\\\"";
259 // Harmony: unreserved + reserved + "\\\""
260
261 /**
262 * Valid charset for RFC 2396 {@code scheme-specific-part},
263 * additional to legal {@code alphanum} characters.
264 * <p>
265 * {@value} + {@code alphanum}
266 * </p>
267 */
268 public static final String SSP_LEGAL = QUERY_LEGAL;
269 // Harmony: unreserved + reserved
270
271 /**
272 * Valid charset for RFC 2396 {@code fragment},
273 * additional to legal {@code alphanum} characters.
274 * <p>
275 * {@value} + {@code alphanum}
276 * </p>
277 */
278 public static final String FRAG_LEGAL = UNRESERVED + RESERVED;
279 // Harmony: unreserved + reserved
280
281 /** {@value} */
282 public static final char SCHEME_SEPARATOR = ':';
283 /** {@value} */
284 public static final char QUERY_SEPARATOR = '?';
285 /** {@value} */
286 public static final char FRAGMENT_SEPARATOR = '#';
287 /** {@value} */
288 public static final String FILE_SCHEME = "file";
289 /** {@value} */
290 public static final String HTTP_SCHEME = "http";
291 /** {@value} */
292 public static final String HTTPS_SCHEME = "https";
293 /** {@value} */
294 public static final String JAR_SCHEME = "jar";
295 /** A JAR sub-protocol is separated from the JAR entry w/ this separator {@value}. Even if no class is specified '!/' must follow!. */
296 public static final char JAR_SCHEME_SEPARATOR = '!';
297
298 /**
299 * Immutable RFC3986 encoded string.
300 */
301 public static class Encoded implements Comparable<Encoded>, CharSequence {
302 private final String s;
303
304 /**
305 * Casts the given encoded String by creating a new Encoded instance.
306 * <p>
307 * No encoding will be performed, use with care.
308 * </p>
309 */
310 public static Encoded cast(final String encoded) {
311 return new Encoded(encoded);
312 }
313
314 Encoded(final String encodedString) {
315 this.s = encodedString;
316 }
317
318 /**
319 * Encodes all characters into their hexadecimal value prepended by '%', except:
320 * <ol>
321 * <li>letters ('a'..'z', 'A'..'Z')</li>
322 * <li>numbers ('0'..'9')</li>
323 * <li>characters in the legal-set parameter</li>
324 * <li> others (unicode characters that are not in
325 * US-ASCII set, and are not ISO Control or are not ISO Space characters)</li>
326 * </ol>
327 * <p>
328 * Uses {@link Uri#encode(String, String)} for implementation..
329 * </p>
330 *
331 * @param vanilla the string to be encoded
332 * @param legal extended character set, allowed to be preserved in the vanilla string
333 */
334 public Encoded(final String vanilla, final String legal) {
335 this.s = encode(vanilla, legal);
336 }
337
338 public boolean isASCII() { return false; }
339
340 /** Returns the encoded String */
341 public final String get() { return s; }
342
343 /**
344 * Decodes the string argument which is assumed to be encoded in the {@code
345 * x-www-form-urlencoded} MIME content type using the UTF-8 encoding scheme.
346 * <p>
347 *'%' and two following hex digit characters are converted to the
348 * equivalent byte value. All other characters are passed through
349 * unmodified.
350 * </p>
351 * <p>
352 * e.g. "A%20B%20C %24%25" -> "A B C $%"
353 * </p>
354 * <p>
355 * Uses {@link Uri#decode(String)} for implementation..
356 * </p>
357 */
358 public final String decode() { return Uri.decode(s); }
359
360 //
361 // Basic Object / Identity
362 //
363
364 /**
365 * {@inheritDoc}
366 * <p>
367 * Returns the encoded String, same as {@link #get()}.
368 * </p>
369 */
370 @Override
371 public final String toString() { return s; }
372
373 @Override
374 public final int hashCode() { return s.hashCode(); }
375
376 /**
377 * {@inheritDoc}
378 *
379 * @param o The comparison argument, either a {@link Encoded} or a {@link String}
380 *
381 * @return {@code true} if the given object is equivalent to this instance,
382 * otherwise {@code false}.
383 *
384 * @see #compareTo(Encoded)
385 * @see #equalsIgnoreCase(Encoded)
386 */
387 @Override
388 public final boolean equals(final Object o) {
389 if (this == o) {
390 return true;
391 }
392 if (o instanceof Encoded) {
393 return s.equals(((Encoded)o).s);
394 }
395 return s.equals(o);
396 }
397
398 //
399 // CharSequence
400 //
401
402 @Override
403 public final int length() { return s.length(); }
404
405 @Override
406 public final char charAt(final int index) { return s.charAt(index); }
407
408 @Override
409 public final CharSequence subSequence(final int start, final int end) { return s.subSequence(start, end); }
410
411 @Override
412 public final int compareTo(final Encoded o) { return s.compareTo(o.s); }
413
414 //
415 // String derived ..
416 //
417 /** See {@link String#concat(String)}. */
418 public Encoded concat(final Encoded encoded) { return new Encoded(s.concat(encoded.s)); }
419
420 /** See {@link String#substring(int)}. */
421 public final Encoded substring(final int start) { return new Encoded(s.substring(start)); }
422 /** See {@link String#substring(int, int)}. */
423 public final Encoded substring(final int start, final int end) { return new Encoded(s.substring(start, end)); }
424
425 /** See {@link String#indexOf(int)}. */
426 public final int indexOf(final int ch) { return s.indexOf(ch); }
427 /** See {@link String#indexOf(int, int)}. */
428 public final int indexOf(final int ch, final int fromIndex) { return s.indexOf(ch, fromIndex); }
429 /** See {@link String#indexOf(String)}. */
430 public final int indexOf(final String str) { return s.indexOf(str); }
431 /** See {@link String#indexOf(String, int)}. */
432 public final int indexOf(final String str, final int fromIndex) { return s.indexOf(str, fromIndex); }
433
434 /** See {@link String#lastIndexOf(int)}. */
435 public final int lastIndexOf(final int ch) { return s.lastIndexOf(ch); }
436 /** See {@link String#lastIndexOf(int, int)}. */
437 public int lastIndexOf(final int ch, final int fromIndex) { return s.lastIndexOf(ch, fromIndex); }
438 /** See {@link String#lastIndexOf(String)}. */
439 public int lastIndexOf(final String str) { return s.lastIndexOf(str); }
440 /** See {@link String#lastIndexOf(String, int)}. */
441 public int lastIndexOf(final String str, final int fromIndex) { return s.lastIndexOf(str, fromIndex); }
442
443 /** See {@link String#startsWith(String)} */
444 public boolean startsWith(final String prefix) { return s.startsWith(prefix); }
445 /** See {@link String#startsWith(String, int)} */
446 public boolean startsWith(final String prefix, final int toffset) { return s.startsWith(prefix, toffset); }
447 /** See {@link String#endsWith(String)} */
448 public boolean endsWith(final String suffix) { return s.endsWith(suffix); }
449
450 /** See {@link String#equalsIgnoreCase(String)}. */
451 public final boolean equalsIgnoreCase(final Encoded anotherEncoded) { return s.equalsIgnoreCase(anotherEncoded.s); }
452 }
453
454 public static class ASCIIEncoded extends Encoded {
455 /**
456 * Casts the given encoded String by creating a new ASCIIEncoded instance.
457 * <p>
458 * No encoding will be performed, use with care.
459 * </p>
460 */
461 public static ASCIIEncoded cast(final String encoded) {
462 return new ASCIIEncoded(encoded, null);
463 }
464 private ASCIIEncoded(final String encoded, final Object unused) {
465 super(encoded);
466 }
467
468 /**
469 * Other characters, which are Unicode chars that are not US-ASCII, and are
470 * not ISO Control or are not ISO Space chars are not preserved
471 * and encoded into their hexidecimal value prepended by '%'.
472 * <p>
473 * For example: Euro currency symbol -> "%E2%82%AC".
474 * </p>
475 * <p>
476 * Uses {@link Uri#encodeToASCIIString(String)} for implementation.
477 * </p>
478 * @param unicode unencoded input
479 */
480 public ASCIIEncoded(final String unicode) {
481 super(encodeToASCIIString(unicode));
482 }
483 @Override
484 public boolean isASCII() { return true; }
485 }
486
487 private static void encodeChar2UTF8(final StringBuilder buf, final char ch) {
488 final byte[] bytes;
489 try {
490 bytes = new String(new char[] { ch }).getBytes(ENCODING);
491 } catch (final UnsupportedEncodingException e) {
492 throw new RuntimeException(MSG_ENCODING_NA, e);
493 }
494 // FIXME: UTF-8 produces more than one byte ? Optimization might be possible.
495 for (int j = 0; j < bytes.length; j++) {
496 final byte b = bytes[j];
497 buf.append('%');
498 buf.append(DIGITS.charAt( ( b & 0xf0 ) >> 4 ));
499 buf.append(DIGITS.charAt( b & 0xf ));
500 }
501 }
502
503 /**
504 * All characters are encoded into their hexadecimal value prepended by '%', except:
505 * <ol>
506 * <li>letters ('a'..'z', 'A'..'Z')</li>
507 * <li>numbers ('0'..'9')</li>
508 * <li>characters in the legal-set parameter</li>
509 * <li> others (unicode characters that are not in
510 * US-ASCII set, and are not ISO Control or are not ISO Space characters)</li>
511 * </ol>
512 * <p>
513 * Use {@link #encodeToASCIIString(String)} for US-ASCII encoding.
514 * </p>
515 * <p>
516 * Consider using {@link Encoded#Encoded(String, String)} in APIs
517 * to distinguish encoded from unencoded data by type.
518 * </p>
519 *
520 * @param vanilla the string to be encoded
521 * @param legal extended character set, allowed to be preserved in the vanilla string
522 * @return java.lang.String the converted string
523 */
524 public static String encode(final String vanilla, final String legal) {
525 if( null == vanilla ) {
526 return null;
527 }
528 final StringBuilder buf = new StringBuilder();
529 for (int i = 0; i < vanilla.length(); i++) {
530 final char ch = vanilla.charAt(i);
531 if ( (ch >= 'a' && ch <= 'z') ||
532 (ch >= 'A' && ch <= 'Z') ||
533 (ch >= '0' && ch <= '9') ||
534 legal.indexOf(ch) > -1 ||
535 ( ch > 127 && !Character.isSpaceChar(ch) && !Character.isISOControl(ch) )
536 ) {
537 buf.append(ch);
538 } else {
539 encodeChar2UTF8(buf, ch);
540 }
541 }
542 return buf.toString();
543 }
544
545 /**
546 * Other characters, which are Unicode chars that are not US-ASCII, and are
547 * not ISO Control or are not ISO Space chars are not preserved
548 * and encoded into their hexidecimal value prepended by '%'.
549 * <p>
550 * For example: Euro currency symbol -> "%E2%82%AC".
551 * </p>
552 * <p>
553 * Consider using {@link ASCIIEncoded#ASCIIEncoded(String)} in APIs
554 * to distinguish encoded from unencoded data by type.
555 * </p>
556 * @param unicode string to be converted
557 * @return java.lang.String the converted string
558 */
559 public static String encodeToASCIIString(final String unicode) {
560 final StringBuilder buf = new StringBuilder();
561 for (int i = 0; i < unicode.length(); i++) {
562 final char ch = unicode.charAt(i);
563 if (ch <= 127) {
564 buf.append(ch);
565 } else {
566 encodeChar2UTF8(buf, ch);
567 }
568 }
569 return buf.toString();
570 }
571
572 /**
573 * Safe {@link Encoded#decode()} call on optional {@code encoded} instance.
574 * @param encoded {@link Encoded} instance to be decoded, may be {@code null}.
575 * @return the {@link Encoded#decode() decoded} String or {@code null} if {@code encoded} was {@code null}.
576 */
577 public static String decode(final Encoded encoded) {
578 return null != encoded ? encoded.decode() : null;
579 }
580
581 /**
582 * Decodes the string argument which is assumed to be encoded in the {@code
583 * x-www-form-urlencoded} MIME content type using the UTF-8 encoding scheme.
584 * <p>
585 *'%' and two following hex digit characters are converted to the
586 * equivalent byte value. All other characters are passed through
587 * unmodified.
588 * </p>
589 * <p>
590 * e.g. "A%20B%20C %24%25" -> "A B C $%"
591 * </p>
592 *
593 * @param encoded The encoded string.
594 * @return java.lang.String The decoded version.
595 */
596 public static String decode(final String encoded) {
597 if( null == encoded ) {
598 return null;
599 }
600 final StringBuilder result = new StringBuilder();
601 final byte[] buf = new byte[32];
602 int bufI = 0;
603 for (int i = 0; i < encoded.length();) {
604 final char c = encoded.charAt(i);
605 if (c == '%') {
606 bufI = 0;
607 do {
608 if (i + 2 >= encoded.length()) {
609 throw new IllegalArgumentException("missing '%' hex-digits at index "+i);
610 }
611 final int d1 = Character.digit(encoded.charAt(i + 1), 16);
612 final int d2 = Character.digit(encoded.charAt(i + 2), 16);
613 if (d1 == -1 || d2 == -1) {
614 throw new IllegalArgumentException("invalid hex-digits at index "+i+": "+encoded.substring(i, i + 3));
615 }
616 buf[bufI++] = (byte) ((d1 << 4) + d2);
617 if( 32 == bufI ) {
618 appendUTF8(result, buf, bufI);
619 bufI = 0;
620 }
621 i += 3;
622 } while (i < encoded.length() && encoded.charAt(i) == '%');
623 if( 0 < bufI ) {
624 appendUTF8(result, buf, bufI);
625 }
626 } else {
627 result.append(c);
628 i++;
629 }
630 }
631 return result.toString();
632 }
633 private static void appendUTF8(final StringBuilder sb, final byte[] buf, final int count) {
634 try {
635 sb.append(new String(buf, 0, count, ENCODING));
636 } catch (final UnsupportedEncodingException e) {
637 throw new RuntimeException(MSG_ENCODING_NA, e);
638 }
639 }
640
641 /**
642 * Creates a new Uri instance using the given unencoded arguments.
643 * <p>
644 * This constructor first creates a temporary Uri string from the given unencoded components. This
645 * string will be parsed later on to create the Uri instance.
646 * </p>
647 * <p>
648 * {@code [scheme:]scheme-specific-part[#fragment]}
649 * </p>
650 * <p>
651 * {@code host} and {@code port} <i>may</i> be undefined or invalid within {@code scheme-specific-part}.
652 * </p>
653 *
654 * @param scheme the unencoded scheme part of the Uri.
655 * @param ssp the unencoded scheme-specific-part of the Uri.
656 * @param fragment the unencoded fragment part of the Uri.
657 * @throws URISyntaxException
658 * if the temporary created string doesn't fit to the
659 * specification RFC2396 or could not be parsed correctly.
660 */
661 public static Uri create(final String scheme, final String ssp, final String fragment) throws URISyntaxException {
662 if ( emptyString(scheme) && emptyString(ssp) && emptyString(fragment) ) {
663 throw new URISyntaxException("", "all empty parts");
664 }
665 final StringBuilder uri = new StringBuilder();
666 if ( !emptyString(scheme) ) {
667 uri.append(scheme);
668 uri.append(SCHEME_SEPARATOR);
669 }
670 if ( !emptyString(ssp) ) {
671 // QUOTE ILLEGAL CHARACTERS
672 uri.append(encode(ssp, SSP_LEGAL));
673 }
674 if ( !emptyString(fragment) ) {
675 uri.append(FRAGMENT_SEPARATOR);
676 // QUOTE ILLEGAL CHARACTERS
677 uri.append(encode(fragment, FRAG_LEGAL));
678 }
679 return new Uri(new Encoded(uri.toString()), false, 0);
680 }
681
682 /**
683 * Creates a new Uri instance using the given encoded arguments.
684 * <p>
685 * This constructor first creates a temporary Uri string from the given encoded components. This
686 * string will be parsed later on to create the Uri instance.
687 * </p>
688 * <p>
689 * The given encoded components are taken as-is, i.e. no re-encoding will be performed!
690 * However, Uri parsing will re-evaluate encoding of the resulting components.
691 * </p>
692 * <p>
693 * {@code [scheme:]scheme-specific-part[#fragment]}
694 * </p>
695 * <p>
696 * {@code host} and {@code port} <i>may</i> be undefined or invalid within {@code scheme-specific-part}.
697 * </p>
698 *
699 * @param scheme the encoded scheme part of the Uri.
700 * @param ssp the encoded scheme-specific-part of the Uri.
701 * @param fragment the encoded fragment part of the Uri.
702 * @throws URISyntaxException
703 * if the temporary created string doesn't fit to the
704 * specification RFC2396 or could not be parsed correctly.
705 */
706 public static Uri create(final Encoded scheme, final Encoded ssp, final Encoded fragment) throws URISyntaxException {
707 if ( emptyString(scheme) && emptyString(ssp) && emptyString(fragment) ) {
708 throw new URISyntaxException("", "all empty parts");
709 }
710 final StringBuilder uri = new StringBuilder();
711 if ( !emptyString(scheme) ) {
712 uri.append(scheme);
713 uri.append(SCHEME_SEPARATOR);
714 }
715 if ( !emptyString(ssp) ) {
716 uri.append(ssp.get());
717 }
718 if ( !emptyString(fragment) ) {
719 uri.append(FRAGMENT_SEPARATOR);
720 uri.append(fragment.get());
721 }
722 return new Uri(new Encoded(uri.toString()), false, 0);
723 }
724
725 /**
726 * Creates a new Uri instance using the given unencoded arguments.
727 * <p>
728 * This constructor first creates a temporary Uri string from the given unencoded components. This
729 * string will be parsed later on to create the Uri instance.
730 * </p>
731 * <p>
732 * {@code [scheme:][user-info@]host[:port][path][?query][#fragment]}
733 * </p>
734 * <p>
735 * {@code host} and {@code port} <i>must</i> be defined and valid, if any {@code authority} components are defined,
736 * i.e. {@code user-info}, {@code host} or {@code port}.
737 * </p>
738 *
739 * @param scheme the unencoded scheme part of the Uri.
740 * @param userinfo the unencoded user information of the Uri for authentication and authorization, {@code null} for undefined.
741 * @param host the unencoded host name of the Uri, {@code null} for undefined.
742 * @param port the port number of the Uri, -1 for undefined.
743 * @param path the unencoded path to the resource on the host.
744 * @param query the unencoded query part of the Uri to specify parameters for the resource.
745 * @param fragment the unencoded fragment part of the Uri.
746 * @throws URISyntaxException
747 * if the temporary created string doesn't fit to the
748 * specification RFC2396 or could not be parsed correctly.
749 */
750 public static Uri create (final String scheme, final String userinfo, String host, final int port,
751 final String path, final String query, final String fragment) throws URISyntaxException {
752 if ( emptyString(scheme) && emptyString(userinfo) && emptyString(host) && emptyString(path) &&
753 emptyString(query) && emptyString(fragment) ) {
754 throw new URISyntaxException("", "all empty parts");
755 }
756
757 if ( !emptyString(scheme) && !emptyString(path) && path.length() > 0 && path.charAt(0) != '/') {
758 throw new URISyntaxException(path, "path doesn't start with '/'");
759 }
760
761 final StringBuilder uri = new StringBuilder();
762 if ( !emptyString(scheme) ) {
763 uri.append(scheme);
764 uri.append(SCHEME_SEPARATOR);
765 }
766
767 if ( !emptyString(userinfo) || !emptyString(host) || port != -1) {
768 uri.append("//");
769 }
770
771 if ( !emptyString(userinfo) ) {
772 // QUOTE ILLEGAL CHARACTERS in userinfo
773 uri.append(encode(userinfo, USERINFO_LEGAL));
774 uri.append('@');
775 }
776
777 if ( !emptyString(host) ) {
778 // check for ipv6 addresses that hasn't been enclosed
779 // in square brackets
780 if (host.indexOf(SCHEME_SEPARATOR) != -1 && host.indexOf(']') == -1
781 && host.indexOf('[') == -1) {
782 host = "[" + host + "]";
783 }
784 uri.append(host);
785 }
786
787 if ( port != -1 ) {
788 uri.append(SCHEME_SEPARATOR);
789 uri.append(port);
790 }
791
792 if ( !emptyString(path) ) {
793 // QUOTE ILLEGAL CHARS
794 uri.append(encode(path, PATH_LEGAL));
795 }
796
797 if ( !emptyString(query) ) {
798 uri.append(QUERY_SEPARATOR);
799 // QUOTE ILLEGAL CHARS
800 uri.append(encode(query, QUERY_LEGAL));
801 }
802
803 if ( !emptyString(fragment) ) {
804 // QUOTE ILLEGAL CHARS
805 uri.append(FRAGMENT_SEPARATOR);
806 uri.append(encode(fragment, FRAG_LEGAL));
807 }
808 return new Uri(new Encoded(uri.toString()), true, 0);
809 }
810
811 /**
812 * Creates a new Uri instance using the given encoded arguments.
813 * <p>
814 * This constructor first creates a temporary Uri string from the given encoded components. This
815 * string will be parsed later on to create the Uri instance.
816 * </p>
817 * <p>
818 * The given encoded components are taken as-is, i.e. no re-encoding will be performed!
819 * However, Uri parsing will re-evaluate encoding of the resulting components.
820 * </p>
821 * <p>
822 * {@code [scheme:][user-info@]host[:port][path][?query][#fragment]}
823 * </p>
824 * <p>
825 * {@code host} and {@code port} <i>must</i> be defined and valid, if any {@code authority} components are defined,
826 * i.e. {@code user-info}, {@code host} or {@code port}.
827 * </p>
828 *
829 * @param scheme the encoded scheme part of the Uri.
830 * @param userinfo the encoded user information of the Uri for authentication and authorization, {@code null} for undefined.
831 * @param host the encoded host name of the Uri, {@code null} for undefined.
832 * @param port the port number of the Uri, -1 for undefined.
833 * @param path the encoded path to the resource on the host.
834 * @param query the encoded query part of the Uri to specify parameters for the resource.
835 * @param fragment the encoded fragment part of the Uri.
836 * @throws URISyntaxException
837 * if the temporary created string doesn't fit to the
838 * specification RFC2396 or could not be parsed correctly.
839 */
840 public static Uri create (final Encoded scheme, final Encoded userinfo, final Encoded host, final int port,
841 final Encoded path, final Encoded query, final Encoded fragment) throws URISyntaxException {
842 if ( emptyString(scheme) && emptyString(userinfo) && emptyString(host) && emptyString(path) &&
843 emptyString(query) && emptyString(fragment) ) {
844 throw new URISyntaxException("", "all empty parts");
845 }
846
847 if ( !emptyString(scheme) && !emptyString(path) && path.length() > 0 && path.charAt(0) != '/') {
848 throw new URISyntaxException(path.get(), "path doesn't start with '/'");
849 }
850
851 final StringBuilder uri = new StringBuilder();
852 if ( !emptyString(scheme) ) {
853 uri.append(scheme);
854 uri.append(SCHEME_SEPARATOR);
855 }
856
857 if ( !emptyString(userinfo) || !emptyString(host) || port != -1) {
858 uri.append("//");
859 }
860
861 if ( !emptyString(userinfo) ) {
862 uri.append(userinfo.get());
863 uri.append('@');
864 }
865
866 if ( !emptyString(host) ) {
867 uri.append(host.get());
868 }
869
870 if ( port != -1 ) {
871 uri.append(SCHEME_SEPARATOR);
872 uri.append(port);
873 }
874
875 if ( !emptyString(path) ) {
876 uri.append(path.get());
877 }
878
879 if ( !emptyString(query) ) {
880 uri.append(QUERY_SEPARATOR);
881 uri.append(query.get());
882 }
883
884 if ( !emptyString(fragment) ) {
885 uri.append(FRAGMENT_SEPARATOR);
886 uri.append(fragment.get());
887 }
888 return new Uri(new Encoded(uri.toString()), true, 0);
889 }
890
891 /**
892 * Creates a new Uri instance using the given unencoded arguments.
893 * <p>
894 * This constructor first creates a temporary Uri string from the given unencoded components. This
895 * string will be parsed later on to create the Uri instance.
896 * </p>
897 * <p>
898 * {@code [scheme:]host[path][#fragment]}
899 * </p>
900 * <p>
901 * {@code host} <i>must</i> be valid, if defined.
902 * </p>
903 *
904 * @param scheme the unencoded scheme part of the Uri.
905 * @param host the unencoded host name of the Uri.
906 * @param path the unencoded path to the resource on the host.
907 * @param fragment the unencoded fragment part of the Uri.
908 * @throws URISyntaxException
909 * if the temporary created string doesn't fit to the
910 * specification RFC2396 or could not be parsed correctly.
911 */
912 public static Uri create(final String scheme, final String host, final String path, final String fragment) throws URISyntaxException {
913 return create(scheme, null, host, -1, path, null, fragment);
914 }
915
916 /**
917 * Creates a new Uri instance using the given encoded arguments.
918 * <p>
919 * This constructor first creates a temporary Uri string from the given encoded components. This
920 * string will be parsed later on to create the Uri instance.
921 * </p>
922 * <p>
923 * The given encoded components are taken as-is, i.e. no re-encoding will be performed!
924 * However, Uri parsing will re-evaluate encoding of the resulting components.
925 * </p>
926 * <p>
927 * {@code [scheme:]host[path][#fragment]}
928 * </p>
929 * <p>
930 * {@code host} <i>must</i> be valid, if defined.
931 * </p>
932 *
933 * @param scheme the encoded scheme part of the Uri.
934 * @param host the encoded host name of the Uri.
935 * @param path the encoded path to the resource on the host.
936 * @param fragment the encoded fragment part of the Uri.
937 * @throws URISyntaxException
938 * if the temporary created string doesn't fit to the
939 * specification RFC2396 or could not be parsed correctly.
940 */
941 public static Uri create(final Encoded scheme, final Encoded host, final Encoded path, final Encoded fragment) throws URISyntaxException {
942 return create(scheme, null, host, -1, path, null, fragment);
943 }
944
945 /**
946 * Creates a new Uri instance using the given unencoded arguments.
947 * <p>
948 * This constructor first creates a temporary Uri string from the given unencoded components. This
949 * string will be parsed later on to create the Uri instance.
950 * </p>
951 * <p>
952 * {@code [scheme:][//authority][path][?query][#fragment]}
953 * </p>
954 * <p>
955 * {@code host} and {@code port} <i>may</i> be undefined or invalid, in the optional {@code authority}.
956 * </p>
957 *
958 * @param scheme the unencoded scheme part of the Uri.
959 * @param authority the unencoded authority part of the Uri.
960 * @param path the unencoded path to the resource on the host.
961 * @param query the unencoded query part of the Uri to specify parameters for the resource.
962 * @param fragment the unencoded fragment part of the Uri.
963 *
964 * @throws URISyntaxException
965 * if the temporary created string doesn't fit to the
966 * specification RFC2396 or could not be parsed correctly.
967 */
968 public static Uri create(final String scheme, final String authority, final String path, final String query, final String fragment) throws URISyntaxException {
969 if ( emptyString(scheme) && emptyString(authority) && emptyString(path) &&
970 emptyString(query) && emptyString(fragment) ) {
971 throw new URISyntaxException("", "all empty parts");
972 }
973 if ( !emptyString(scheme) && !emptyString(path) && path.length() > 0 && path.charAt(0) != '/') {
974 throw new URISyntaxException(path, "path doesn't start with '/'");
975 }
976
977 final StringBuilder uri = new StringBuilder();
978 if ( !emptyString(scheme) ) {
979 uri.append(scheme);
980 uri.append(SCHEME_SEPARATOR);
981 }
982 if ( !emptyString(authority) ) {
983 uri.append("//");
984 // QUOTE ILLEGAL CHARS
985 uri.append(encode(authority, AUTHORITY_LEGAL));
986 }
987
988 if ( !emptyString(path) ) {
989 // QUOTE ILLEGAL CHARS
990 uri.append(encode(path, PATH_LEGAL));
991 }
992 if ( !emptyString(query) ) {
993 // QUOTE ILLEGAL CHARS
994 uri.append(QUERY_SEPARATOR);
995 uri.append(encode(query, QUERY_LEGAL));
996 }
997 if ( !emptyString(fragment) ) {
998 // QUOTE ILLEGAL CHARS
999 uri.append(FRAGMENT_SEPARATOR);
1000 uri.append(encode(fragment, FRAG_LEGAL));
1001 }
1002 return new Uri(new Encoded(uri.toString()), false, 0);
1003 }
1004
1005 /**
1006 * Creates a new Uri instance using the given encoded arguments.
1007 * <p>
1008 * This constructor first creates a temporary Uri string from the given encoded encoded components. This
1009 * string will be parsed later on to create the Uri instance.
1010 * </p>
1011 * <p>
1012 * The given encoded components are taken as-is, i.e. no re-encoding will be performed!
1013 * However, Uri parsing will re-evaluate encoding of the resulting components.
1014 * </p>
1015 * <p>
1016 * {@code [scheme:][//authority][path][?query][#fragment]}
1017 * </p>
1018 * <p>
1019 * {@code host} and {@code port} <i>may</i> be undefined or invalid, in the optional {@code authority}.
1020 * </p>
1021 *
1022 * @param scheme the encoded scheme part of the Uri.
1023 * @param authority the encoded authority part of the Uri.
1024 * @param path the encoded path to the resource on the host.
1025 * @param query the encoded query part of the Uri to specify parameters for the resource.
1026 * @param fragment the encoded fragment part of the Uri.
1027 *
1028 * @throws URISyntaxException
1029 * if the temporary created string doesn't fit to the
1030 * specification RFC2396 or could not be parsed correctly.
1031 */
1032 public static Uri create(final Encoded scheme, final Encoded authority, final Encoded path, final Encoded query, final Encoded fragment) throws URISyntaxException {
1033 if ( emptyString(scheme) && emptyString(authority) && emptyString(path) &&
1034 emptyString(query) && emptyString(fragment) ) {
1035 throw new URISyntaxException("", "all empty parts");
1036 }
1037 if ( !emptyString(scheme) && !emptyString(path) && path.length() > 0 && path.charAt(0) != '/') {
1038 throw new URISyntaxException(path.get(), "path doesn't start with '/'");
1039 }
1040
1041 final StringBuilder uri = new StringBuilder();
1042 if ( !emptyString(scheme) ) {
1043 uri.append(scheme);
1044 uri.append(SCHEME_SEPARATOR);
1045 }
1046 if ( !emptyString(authority) ) {
1047 uri.append("//");
1048 uri.append(authority.get());
1049 }
1050
1051 if ( !emptyString(path) ) {
1052 uri.append(path.get());
1053 }
1054 if ( !emptyString(query) ) {
1055 uri.append(QUERY_SEPARATOR);
1056 uri.append(query.get());
1057 }
1058 if ( !emptyString(fragment) ) {
1059 uri.append(FRAGMENT_SEPARATOR);
1060 uri.append(fragment.get());
1061 }
1062 return new Uri(new Encoded(uri.toString()), false, 0);
1063 }
1064
1065 /**
1066 * Casts the given encoded String to a {@link Encoded#cast(String) new Encoded instance}
1067 * used to create the resulting Uri instance via {@link #Uri(Encoded)}.
1068 * <p>
1069 * No encoding will be performed on the given {@code encodedUri}, use with care.
1070 * </p>
1071 * @throws URISyntaxException
1072 */
1073 public static Uri cast(final String encodedUri) throws URISyntaxException {
1074 return new Uri(Encoded.cast(encodedUri));
1075 }
1076
1077 /**
1078 * Creates a new Uri instance using the given file-path argument.
1079 * <p>
1080 * This constructor first creates a temporary Uri string from the given components. This
1081 * string will be parsed later on to create the Uri instance.
1082 * </p>
1083 * <p>
1084 * {@code file:path}
1085 * </p>
1086 *
1087 * @param path the unencoded path of the {@code file} {@code schema}.
1088 * @throws URISyntaxException
1089 * if the temporary created string doesn't fit to the
1090 * specification RFC2396 or could not be parsed correctly.
1091 */
1092 public static Uri valueOfFilepath(final String path) throws URISyntaxException {
1093 if ( emptyString(path) ) {
1094 throw new URISyntaxException("", "empty path");
1095 }
1096 if ( path.charAt(0) != '/' ) {
1097 throw new URISyntaxException(path, "path doesn't start with '/'");
1098 }
1099
1100 final StringBuilder uri = new StringBuilder();
1101 uri.append(FILE_SCHEME);
1102 uri.append(SCHEME_SEPARATOR);
1103
1104 // QUOTE ILLEGAL CHARS
1105 uri.append(encode(path, PATH_LEGAL));
1106
1107 return new Uri(new Encoded(uri.toString()), false, 0);
1108 }
1109
1110 /**
1111 * Creates a new Uri instance using the given File instance.
1112 * <p>
1113 * This constructor first creates a temporary Uri string from the given components. This
1114 * string will be parsed later on to create the Uri instance.
1115 * </p>
1116 * <p>
1117 * {@code file:path}
1118 * </p>
1119 *
1120 * @param file using {@link IOUtil#slashify(String, boolean, boolean) slashified} {@link File#getAbsolutePath() absolute-path}
1121 * for the path of the {@code file} {@code schema}, utilizing {@link #valueOfFilepath(String)}.
1122 * @throws URISyntaxException
1123 * if the temporary created string doesn't fit to the
1124 * specification RFC2396 or could not be parsed correctly.
1125 */
1126 public static Uri valueOf(final File file) throws URISyntaxException {
1127 return Uri.valueOfFilepath(Util.slashify(file.getAbsolutePath(), true, file.isDirectory()));
1128 }
1129
1130 /**
1131 * Creates a new Uri instance using the given URI instance.
1132 * <p>
1133 * Re-encoding will be performed if the given URI is {@link URI#isOpaque() not opaque}.
1134 * </p>
1135 * <p>
1136 * See {@link #PARSE_HINT_FIX_PATH} for issues of injecting opaque URLs.
1137 * </p>
1138 *
1139 * @param uri A given URI instance
1140 * @throws URISyntaxException
1141 * if the temporary created string doesn't fit to the
1142 * specification RFC2396 or could not be parsed correctly.
1143 */
1144 public static Uri valueOf(final java.net.URI uri) throws URISyntaxException {
1145 if( uri.isOpaque()) {
1146 // opaque, without host validation.
1147 // Note: This may induce encoding errors of authority and path, see {@link #PARSE_HINT_FIX_PATH}
1148 return new Uri(new Encoded( uri.toString() ), false, 0);
1149 } else {
1150 // with host validation if authority is defined
1151 return Uri.create(uri.getScheme(), uri.getUserInfo(), uri.getHost(), uri.getPort(),
1152 uri.getPath(), uri.getQuery(), uri.getFragment());
1153 }
1154 }
1155
1156 /**
1157 * Creates a new Uri instance using the given URL instance,
1158 * convenient wrapper for {@link #valueOf(URI)} and {@link URL#toURI()}.
1159 * <p>
1160 * Re-encoding will be performed if the given URL is {@link URI#isOpaque() not opaque}, see {@link #valueOf(URI)}.
1161 * </p>
1162 * <p>
1163 * See {@link #PARSE_HINT_FIX_PATH} for issues of injecting opaque URLs.
1164 * </p>
1165 *
1166 * @param url A given URL instance
1167 *
1168 * @throws URISyntaxException
1169 * if the temporary created string doesn't fit to the
1170 * specification RFC2396 or could not be parsed correctly.
1171 */
1172 public static Uri valueOf(final java.net.URL url) throws URISyntaxException {
1173 return valueOf(url.toURI());
1174 }
1175
1176 //
1177 // All string fields are encoded!
1178 //
1179
1180 /** Encoded input string used at construction, never {@code null}. */
1181 public final Encoded input;
1182
1183 private final Object lazyLock = new Object();
1184
1185 /** Encoded input string used at construction, in US-ASCII encoding. */
1186 private ASCIIEncoded inputASCII;
1187
1188 private int hash;
1189
1190 /** Encoded {@code scheme}, {@code null} if undefined. */
1191 public final Encoded scheme;
1192
1193 /** Encoded {@code scheme-specific-part}, never {@code null}. */
1195 /** Encoded {@code path} part of {@code scheme-specific-part}, never {@code null}. */
1196 public final Encoded path;
1197
1198 /** Indicating whether {@code authority} part is defined or not. */
1199 public final boolean hasAuthority;
1200 /** Encoded {@code authority} part of {@code scheme-specific-part}, {@code null} if undefined. */
1201 public final Encoded authority;
1202 /** Encoded {@code userinfo} part of {@code authority} and {@code scheme-specific-part}, {@code null} if undefined. */
1203 public final Encoded userInfo; // part of authority
1204 /** Encoded {@code host} part of {@code authority} and {@code scheme-specific-part}, {@code null} if undefined. */
1205 public final Encoded host; // part of authority
1206 /** Encoded {@code port} part of {@code authority} and {@code scheme-specific-part}, {@code -1} if undefined. */
1207 public final int port; // part of authority
1208
1209 /** Encoded {@code query} part of {@code scheme-specific-part}, {@code null} if undefined. */
1210 public final Encoded query;
1211
1212 /** Encoded {@code fragment}, {@code null} if undefined. */
1213 public final Encoded fragment;
1214
1215 /** Indicating whether this Uri is absolute, i.e. has a {@code scheme} and hence an absolute {@code scheme-specific-part}. */
1216 public final boolean absolute;
1217
1218 /**
1219 * Indicating whether this Uri is opaque, i.e. non-hierarchical {@code scheme-specific-part}.
1220 * <p>
1221 * An opaque Uri has no {@code scheme-specific-part} being parsed,
1222 * i.e. {@code path}, {@code query} and {@code authority} are {@code null}.
1223 * </p>
1224 */
1225 public final boolean opaque;
1226
1227 /**
1228 * Creates a new Uri instance according to the given encoded string {@code uri}.
1229 *
1230 * @param uri the RFC3986 encoded RFC2396 Uri representation to be parsed into a Uri object
1231 * @throws URISyntaxException
1232 * if the given string {@code uri} doesn't fit to the
1233 * specification RFC2396 and RFC3986 or could not be parsed correctly.
1234 */
1235 public Uri(final Encoded uri) throws URISyntaxException {
1236 this(uri, false, 0);
1237 }
1238
1239 /** Returns true, if this instance is a {@code file} {@code scheme}, otherwise false. */
1240 public final boolean isFileScheme() {
1241 return null != scheme && FILE_SCHEME.equals( scheme.get() );
1242 }
1243
1244 /**
1245 * Returns true, if this instance is a {@code jar} {@code scheme}, otherwise false.
1246 * @since 0.3.0
1247 */
1248 public final boolean isJarScheme() {
1249 return null != scheme && JAR_SCHEME.equals( scheme.get() );
1250 }
1251
1252 /**
1253 * Returns the encoded {@link #input}, never {@code null}.
1254 */
1255 public final Encoded getEncoded() {
1256 return input;
1257 }
1258
1259 /**
1260 * Returns the encoded {@link #input} as String, never {@code null}, same as {@link #getEncoded()}.
1261 */
1262 @Override
1263 public final String toString() {
1264 return input.get();
1265 }
1266
1267 /**
1268 * Returns the encoded {@link #input} encoded in US-ASCII.
1269 */
1271 synchronized( lazyLock ) {
1272 if( null == inputASCII ) {
1273 inputASCII = new ASCIIEncoded(input.get());
1274 }
1275 return inputASCII;
1276 }
1277 }
1278
1279 /**
1280 * Returns a new {@link URI} instance using the encoded {@link #input} string, {@code new URI(uri.input)},
1281 * i.e. no re-encoding will be performed.
1282 * @see #toURIReencoded(boolean)
1283 * @see #valueOf(URI)
1284 */
1285 public final java.net.URI toURI() {
1286 try {
1287 return new java.net.URI(input.get());
1288 } catch (final URISyntaxException e) {
1289 throw new Error(e); // Can't happen
1290 }
1291 }
1292
1293 /**
1294 * Returns a new {@link URI} instance based upon this instance.
1295 * <p>
1296 * All Uri parts of this instance will be decoded
1297 * and encoded by the URI constructor, i.e. re-encoding will be performed.
1298 * </p>
1299 *
1300 * @throws URISyntaxException
1301 * if the given string {@code uri} doesn't fit to the
1302 * specification RFC2396 or could not be parsed correctly.
1303 * @see #toURI()
1304 * @see #valueOf(URI)
1305 */
1306 public final java.net.URI toURIReencoded() throws URISyntaxException {
1307 final java.net.URI recomposedURI;
1308 if( opaque ) {
1309 // opaque, without host validation
1310 recomposedURI = new java.net.URI(decode(scheme), decode(schemeSpecificPart), decode(fragment));
1311 } else if( null != host ) {
1312 // with host validation
1313 recomposedURI = new java.net.URI(decode(scheme), decode(userInfo), decode(host), port,
1315 } else {
1316 // without host validation
1317 recomposedURI = new java.net.URI(decode(scheme), decode(authority),
1319 }
1320 return recomposedURI;
1321 }
1322
1323
1324 /**
1325 * Returns a new {@link URL} instance using the encoded {@link #input} string, {@code new URL(uri.input)},
1326 * i.e. no re-encoding will be performed.
1327 * @throws MalformedURLException
1328 * if an error occurs while creating the URL or no protocol
1329 * handler could be found.
1330 */
1331 public final java.net.URL toURL() throws MalformedURLException {
1332 if (!absolute) {
1333 throw new IllegalArgumentException("Cannot convert relative Uri: "+input);
1334 }
1335 return new java.net.URL(input.get());
1336 }
1337
1338 /**
1339 * If this instance {@link #isFileScheme() is a file scheme},
1340 * implementation decodes <i>[ "//"+{@link #authority} ] + {@link #path}</i>,<br>
1341 * then it processes the result if {@link File#separatorChar} <code> == '\\'</code>
1342 * as follows:
1343 * <ul>
1344 * <li>slash -> backslash</li>
1345 * <li>drop a starting single backslash, preserving windows UNC</li>
1346 * </ul>
1347 * and returns the resulting new {@link File} instance.
1348 * <p>
1349 * Otherwise implementation returns {@code null}.
1350 * </p>
1351 */
1352 public final File toFile() {
1353 if( isFileScheme() && !emptyString(path) ) {
1354 final String authorityS;
1355 if( null == authority ) {
1356 authorityS = "";
1357 } else {
1358 authorityS = "//"+authority.decode();
1359 }
1360 final String path = authorityS+this.path.decode();
1361 if( File.separator.equals("\\") ) {
1362 final String r = patternSingleFS.matcher(path).replaceAll("\\\\");
1363 if( r.startsWith("\\") && !r.startsWith("\\\\") ) { // '\\\\' denotes UNC hostname, which shall not be cut-off
1364 return new File(r.substring(1));
1365 } else {
1366 return new File(r);
1367 }
1368 }
1369 return new File(path);
1370 }
1371 return null;
1372 }
1373
1374 /**
1375 * If this <code>uri</code> is a <i>file scheme</i>
1376 * implementation returns {@link #toFile()}.{@link File#getPath()}.
1377 * <p>
1378 * Otherwise it returns the {@link #toASCIIString()} encoded URI.
1379 * </p>
1380 */
1381 public final String getUriFilePathOrASCII() {
1382 if( isFileScheme() ) {
1383 return toFile().getPath();
1384 } else {
1385 return toASCIIString().get();
1386 }
1387 }
1388
1389 /**
1390 * If this instance's {@link #schemeSpecificPart} contains a Uri itself, a sub-Uri,
1391 * return {@link #schemeSpecificPart} + {@code #} {@link #fragment} via it's own new Uri instance.
1392 * <p>
1393 * In case this Uri is a {@code jar-scheme}, the {@code query} is omitted,
1394 * since 0.3.0
1395 * </p>
1396 * <p>
1397 * Otherwise method returns {@code null}.
1398 * </p>
1399 * <pre>
1400 * Example 1:
1401 * This instance: <code>jar:<i>scheme2</i>:/some/path/gluegen-rt.jar!/com/jogamp/common/GlueGenVersion.class</code>
1402 * Returned Uri: <code><i>scheme2</i>:/some/path/gluegen-rt.jar</code>
1403 *
1404 * Example 2:
1405 * This instance: <code>jar:<i>scheme2</i>:/some/path/gluegen-rt.jar!/com/jogamp/common/GlueGenVersion.class?lala=01#fragment</code>
1406 * Returned Uri: <code><i>scheme2</i>:/some/path/gluegen-rt.jar#fragment</code>
1407 *
1408 * Example 3:
1409 * This instance: <code>scheme1:<i>scheme2</i>:/some/path/gluegen-rt.jar!/?lala=01#fragment</code>
1410 * Returned Uri: <code><i>scheme2</i>:/some/path/gluegen-rt.jar?lala=01#fragment</code>
1411 * </pre>
1412 * @throws URISyntaxException if this Uri is a container Uri and does not comply with the container spec, i.e. a JAR Uri
1413 */
1414 public final Uri getContainedUri() throws URISyntaxException {
1415 if( !emptyString(schemeSpecificPart) ) {
1416 final StringBuilder sb = new StringBuilder();
1417
1418 if( isJarScheme() ) {
1420 if (0 > idx) {
1421 throw new URISyntaxException(input.get(), "missing jar separator");
1422 }
1423 sb.append( schemeSpecificPart.get().substring(0, idx) ); // exclude '!/'
1424 } else {
1425 sb.append( schemeSpecificPart.get() );
1426 }
1427 if ( !emptyString(fragment) ) {
1428 sb.append(FRAGMENT_SEPARATOR);
1429 sb.append(fragment);
1430 }
1431 try {
1432 final int parseHints = opaque ? PARSE_HINT_FIX_PATH : 0;
1433 final Uri res = new Uri(new Encoded(sb.toString()), false, parseHints);
1434 if( null != res.scheme ) {
1435 return res;
1436 }
1437 } catch(final URISyntaxException e) {
1438 // OK, does not contain uri
1439 if( DEBUG ) {
1440 System.err.println("Caught "+e.getClass().getSimpleName()+": "+e.getMessage());
1441 e.printStackTrace();
1442 }
1443 }
1444 }
1445 return null;
1446 }
1447
1448 private static final boolean cutoffLastPathSegementImpl(final StringBuilder pathBuf,
1449 final boolean cutoffFile,
1450 final boolean cutoffDir,
1451 final Encoded appendPath) throws URISyntaxException {
1452 final boolean cleaned;
1453 {// clean-up existing path
1454 final String pathS = pathBuf.toString();
1455 if( 0 > pathS.indexOf("/") && emptyString(appendPath) ) {
1456 return false; // nothing to cut-off
1457 }
1458 pathBuf.setLength(0);
1459 pathBuf.append( Util.cleanPathString( pathS ) );
1460 cleaned = pathBuf.length() != pathS.length();
1461 }
1462
1463 {// cut-off file or last dir-segment
1464 final String pathS = pathBuf.toString();
1465 final int jarSepIdx = pathS.lastIndexOf(JAR_SCHEME_SEPARATOR);
1466 final int e = pathS.lastIndexOf("/");
1467 if( 0 > jarSepIdx || e - 1 > jarSepIdx ) { // stop at jar-separator '!/', if exist
1468 if( cutoffFile && e < pathS.length() - 1 ) {
1469 // cut-off file
1470 pathBuf.setLength(0);
1471 pathBuf.append( pathS.substring(0, e+1) );
1472 } else if( cutoffDir ) {
1473 // cut-off dir-segment
1474 final int p = pathS.lastIndexOf("/", e-1);
1475 if( p >= 0 ) {
1476 pathBuf.setLength(0);
1477 pathBuf.append( pathS.substring(0, p+1) );
1478 } // else keep
1479 } // else keep
1480 }
1481 final boolean cutoff = pathBuf.length() != pathS.length();
1482 if( !cutoff && ( cutoffDir || !cleaned ) && emptyString(appendPath) ) {
1483 return false; // no modifications!
1484 }
1485 }
1486 if( !emptyString(appendPath) ) {
1487 pathBuf.append(appendPath.get());
1488 // 2nd round of cleaning!
1489 final String pathS = pathBuf.toString();
1490 pathBuf.setLength(0);
1491 pathBuf.append( Util.cleanPathString( pathS ) );
1492 }
1493 return true; // continue processing w/ buffer
1494 }
1495 private final Uri cutoffLastPathSegementImpl(final boolean cutoffFile, final boolean cutoffDir, final Encoded appendPath) throws URISyntaxException {
1496 if( opaque ) {
1497 if( emptyString(schemeSpecificPart) ) {
1498 // nothing to cut-off
1499 if( !emptyString(appendPath) ) {
1500 return Uri.create(scheme, appendPath, fragment);
1501 } else {
1502 return null;
1503 }
1504 }
1505 final StringBuilder sspBuf = new StringBuilder(); // without path!
1506
1507 // save optional query in scheme-specific-part
1508 final Encoded queryTemp;
1510 if( queryI >= 0 ) {
1511 queryTemp = schemeSpecificPart.substring(queryI+1);
1512 sspBuf.append( schemeSpecificPart.substring(0, queryI).get() );
1513 } else {
1514 queryTemp = null;
1515 sspBuf.append( schemeSpecificPart.get() );
1516 }
1517
1518 if( !cutoffLastPathSegementImpl(sspBuf, cutoffFile, cutoffDir, appendPath) ) {
1519 return null; // no modifications
1520 }
1521
1522 if ( !emptyString(queryTemp) ) {
1523 sspBuf.append(QUERY_SEPARATOR);
1524 sspBuf.append( queryTemp.get() );
1525 }
1526
1527 // without host validation if authority is defined
1528 return Uri.create(scheme, new Encoded(sspBuf.toString()), fragment);
1529 } else {
1530 if( emptyString(path) ) {
1531 return null; // nothing to cut-off
1532 }
1533 final StringBuilder pathBuf = new StringBuilder();
1534 pathBuf.append( path.get() );
1535
1536 if( !cutoffLastPathSegementImpl(pathBuf, cutoffFile, cutoffDir, appendPath) ) {
1537 return null; // no modifications
1538 }
1539
1540 // with host validation if authority is defined
1541 return Uri.create(scheme, userInfo, host, port, new Encoded(pathBuf.toString()), query, fragment);
1542 }
1543 }
1544
1545 /**
1546 * {@link IOUtil#cleanPathString(String) Normalizes} this Uri's path and return the
1547 * {@link IOUtil#cleanPathString(String) normalized} form if it differs, otherwise {@code this} instance.
1548 * <p>
1549 * <pre>
1550 * Example-1:
1551 * This instance : <code>jar:http://some/path/../gluegen-rt.jar!/com/Test.class?arg=1#frag</code>
1552 * Normalized : <code>jar:http://some/gluegen-rt.jar!/com/Test.class?arg=1#frag</code>
1553 *
1554 * Example-2:
1555 * This instance : <code>http://some/path/../gluegen-rt.jar?arg=1#frag</code>
1556 * Normalized : <code>http://some/gluegen-rt.jar?arg=1#frag</code>
1557 * </pre>
1558 * </p>
1559 */
1560 public final Uri getNormalized() {
1561 try {
1562 final Uri res = cutoffLastPathSegementImpl(false, false, null);
1563 return null != res ? res : this;
1564 } catch (final URISyntaxException e) {
1565 if( DEBUG ) {
1566 System.err.println("Caught "+e.getClass().getSimpleName()+": "+e.getMessage());
1567 e.printStackTrace();
1568 }
1569 return this;
1570 }
1571 }
1572
1573 /**
1574 * Returns this Uri's directory Uri.
1575 * <p>
1576 * This Uri path will be {@link IOUtil#cleanPathString(String) normalized} before returning the directory.
1577 * </p>
1578 * <p>
1579 * If this Uri's directory cannot be found, or already denotes a directory, method returns {@code this} instance.
1580 * </p>
1581 * <p>
1582 * <pre>
1583 * Example-1:
1584 * this-uri: http:/some/path/gluegen-rt.jar?arg=1#frag
1585 * result: http:/some/path/?arg=1#frag
1586 *
1587 * Example-2:
1588 * this-uri: file:/some/path/
1589 * result: file:/some/path/
1590 *
1591 * Example-3:
1592 * this-uri: file:/some/path/lala/lili/../../hello.txt
1593 * result: file:/some/path/
1594 * </pre>
1595 * </p>
1596 * @throws URISyntaxException if the new string {@code uri} doesn't fit to the
1597 * specification RFC2396 and RFC3986 or could not be parsed correctly.
1598 */
1600 try {
1601 final Uri res = cutoffLastPathSegementImpl(true, false, null);
1602 return null != res ? res : this;
1603 } catch (final URISyntaxException e) {
1604 if( DEBUG ) {
1605 System.err.println("Caught "+e.getClass().getSimpleName()+": "+e.getMessage());
1606 e.printStackTrace();
1607 }
1608 return this;
1609 }
1610 }
1611
1612 /**
1613 * Returns this Uri's parent directory Uri..
1614 * <p>
1615 * This Uri path will be {@link IOUtil#cleanPathString(String) normalized} before traversing up one directory.
1616 * </p>
1617 * <p>
1618 * If a parent folder cannot be found, method returns {@code null}.
1619 * </p>
1620 * <p>
1621 * <pre>
1622 * Example-1:
1623 * This instance : <code>jar:http://some/path/gluegen-rt.jar!/com/Test.class?arg=1#frag</code>
1624 * Returned Uri #1: <code>jar:http://some/path/gluegen-rt.jar!/com/?arg=1#frag</code>
1625 * Returned Uri #2: <code>jar:http://some/path/gluegen-rt.jar!/?arg=1#frag</code>
1626 * Returned Uri #3: <code>null</code>
1627 *
1628 * Example-2:
1629 * This instance : <code>http://some/path/gluegen-rt.jar?arg=1#frag</code>
1630 * Returned Uri #1: <code>http://some/path/?arg=1#frag</code>
1631 * Returned Uri #2: <code>http://some/?arg=1#frag</code>
1632 * Returned Uri #2: <code>null</code>
1633 *
1634 * Example-3:
1635 * This instance : <code>http://some/path/../gluegen-rt.jar?arg=1#frag</code>
1636 * Returned Uri #1: <code>http://some/?arg=1#frag</code>
1637 * Returned Uri #2: <code>null</code>
1638 * </pre>
1639 * </p>
1640 */
1641 public final Uri getParent() {
1642 try {
1643 return cutoffLastPathSegementImpl(true, true, null);
1644 } catch (final URISyntaxException e) {
1645 if( DEBUG ) {
1646 System.err.println("Caught "+e.getClass().getSimpleName()+": "+e.getMessage());
1647 e.printStackTrace();
1648 }
1649 return null;
1650 }
1651 }
1652
1653 /**
1654 * Returns a new Uri appending the given {@code appendPath}
1655 * to this instance's {@link #getDirectory() directory}.
1656 * <p>
1657 * If {@code appendPath} is empty, method behaves like {@link #getNormalized()}.
1658 * </p>
1659 * <p>
1660 * This resulting path will be {@link IOUtil#cleanPathString(String) normalized}.
1661 * </p>
1662 * <p>
1663 * <pre>
1664 * Example-1:
1665 * append: null
1666 * this-uri: http:/some/path/gluegen-rt.jar
1667 * result: http:/some/path/gluegen-rt.jar
1668 *
1669 * Example-2:
1670 * append: test.txt
1671 * this-uri: file:/some/path/gluegen-rt.jar
1672 * result: file:/some/path/test.txt
1673 *
1674 * Example-3:
1675 * append: test.txt
1676 * this-uri: file:/some/path/lala/lili/../../hello.txt
1677 * result: file:/some/path/test.txt
1678 * </pre>
1679 * </p>
1680 *
1681 * @param appendPath denotes a relative path to be appended to this Uri's directory
1682 * @throws URISyntaxException
1683 * if the resulting {@code uri} doesn't fit to the
1684 * specification RFC2396 and RFC3986 or could not be parsed correctly.
1685 */
1686 public Uri getRelativeOf(final Encoded appendPath) throws URISyntaxException {
1687 if( emptyString(appendPath) ) {
1688 return getNormalized();
1689 } else {
1690 return cutoffLastPathSegementImpl(true, false, appendPath);
1691 }
1692 }
1693
1694 /**
1695 * Concatenates the given encoded string to the {@link #getEncoded() encoded uri}
1696 * of this instance and returns {@link #Uri(Encoded) a new Uri instance} with the result.
1697 *
1698 * @throws URISyntaxException
1699 * if the concatenated string {@code uri} doesn't fit to the
1700 * specification RFC2396 and RFC3986 or could not be parsed correctly.
1701 */
1702 public final Uri concat(final Encoded suffix) throws URISyntaxException {
1703 if( null == suffix ) {
1704 return this;
1705 } else {
1706 return new Uri( input.concat(suffix) );
1707 }
1708 }
1709
1710 /**
1711 * Returns a new Uri instance w/ the given new query {@code newQuery}.
1712 *
1713 * @throws URISyntaxException if this Uri is {@link #opaque}
1714 * or if the new string {@code uri} doesn't fit to the
1715 * specification RFC2396 and RFC3986 or could not be parsed correctly.
1716 */
1717 public final Uri getNewQuery(final Encoded newQuery) throws URISyntaxException {
1718 if( opaque ) {
1719 throw new URISyntaxException(input.decode(), "Opaque Uri cannot permute by query");
1720 } else {
1721 // with host validation if authority is defined
1722 return Uri.create(scheme, userInfo, host, port, path, newQuery, fragment);
1723 }
1724 }
1725
1726 /**
1727 * {@inheritDoc}
1728 * <p>
1729 * Compares this Uri instance with the given argument {@code o} and
1730 * determines if both are equal. Two Uri instances are equal if all single
1731 * parts are identical in their meaning.
1732 * </p>
1733 *
1734 * @param o
1735 * the Uri this instance has to be compared with.
1736 * @return {@code true} if both Uri instances point to the same resource,
1737 * {@code false} otherwise.
1738 */
1739 @Override
1740 public final boolean equals(final Object o) {
1741 if (!(o instanceof Uri)) {
1742 return false;
1743 }
1744 final Uri uri = (Uri) o;
1745
1746 if (uri.fragment == null && fragment != null || uri.fragment != null && fragment == null) {
1747 return false;
1748 } else if (uri.fragment != null && fragment != null) {
1749 if (!equalsHexCaseInsensitive(uri.fragment, fragment)) {
1750 return false;
1751 }
1752 }
1753
1754 if (uri.scheme == null && scheme != null || uri.scheme != null && scheme == null) {
1755 return false;
1756 } else if (uri.scheme != null && scheme != null) {
1757 if (!uri.scheme.equalsIgnoreCase(scheme)) {
1758 return false;
1759 }
1760 }
1761
1762 if (uri.opaque && opaque) {
1763 return equalsHexCaseInsensitive(uri.schemeSpecificPart, schemeSpecificPart);
1764 } else if (!uri.opaque && !opaque) {
1765 if (!equalsHexCaseInsensitive(path, uri.path)) {
1766 return false;
1767 }
1768
1769 if (uri.query != null && query == null || uri.query == null && query != null) {
1770 return false;
1771 } else if (uri.query != null && query != null) {
1772 if (!equalsHexCaseInsensitive(uri.query, query)) {
1773 return false;
1774 }
1775 }
1776
1777 if (uri.authority != null && authority == null || uri.authority == null && authority != null) {
1778 return false;
1779 } else if (uri.authority != null && authority != null) {
1780 if (uri.host != null && host == null || uri.host == null && host != null) {
1781 return false;
1782 } else if (uri.host == null && host == null) {
1783 // both are registry based, so compare the whole authority
1784 return equalsHexCaseInsensitive(uri.authority, authority);
1785 } else { // uri.host != null && host != null, so server-based
1786 if (!host.equalsIgnoreCase(uri.host)) {
1787 return false;
1788 }
1789
1790 if (port != uri.port) {
1791 return false;
1792 }
1793
1794 if ( uri.userInfo != null && userInfo == null ||
1795 uri.userInfo == null && userInfo != null
1796 ) {
1797 return false;
1798 } else if (uri.userInfo != null && userInfo != null) {
1799 return equalsHexCaseInsensitive(userInfo, uri.userInfo);
1800 } else {
1801 return true;
1802 }
1803 }
1804 } else {
1805 // no authority
1806 return true;
1807 }
1808
1809 } else {
1810 // one is opaque, the other hierarchical
1811 return false;
1812 }
1813 }
1814
1815 /**
1816 * {@inheritDoc}
1817 * <p>
1818 * Gets the hashcode value of this Uri instance.
1819 * </p>
1820 */
1821 @Override
1822 public final int hashCode() {
1823 synchronized( lazyLock ) {
1824 if (hash == -1) {
1825 hash = getHashString().hashCode();
1826 }
1827 return hash;
1828 }
1829 }
1830
1831 /*
1832 * Takes a string that may contain hex sequences like %F1 or %2b and
1833 * converts the hex values following the '%' to lowercase
1834 */
1835 private String convertHexToLowerCase(final String s) {
1836 if (s.indexOf('%') == -1) {
1837 return s;
1838 }
1839 final StringBuilder result = new StringBuilder("");
1840 int index = 0, previndex = 0;
1841 while ((index = s.indexOf('%', previndex)) != -1) {
1842 result.append(s.substring(previndex, index + 1));
1843 result.append(s.substring(index + 1, index + 3).toLowerCase());
1844 index += 3;
1845 previndex = index;
1846 }
1847 return result.toString();
1848 }
1849
1850 /*
1851 * Takes two strings that may contain hex sequences like %F1 or %2b and
1852 * compares them, ignoring case for the hex values. Hex values must always
1853 * occur in pairs as above
1854 */
1855 private boolean equalsHexCaseInsensitive(final Encoded first, final Encoded second) {
1856 if (first.indexOf('%') != second.indexOf('%')) {
1857 return first.equals(second);
1858 }
1859
1860 int index = 0, previndex = 0;
1861 while ( ( index = first.indexOf('%', previndex) ) != -1 &&
1862 second.indexOf('%', previndex) == index
1863 ) {
1864 if( !first.get().substring(previndex, index).equals( second.get().substring(previndex, index) ) ) {
1865 return false;
1866 }
1867 if( !first.get().substring(index + 1, index + 3).equalsIgnoreCase( second.get().substring(index + 1, index + 3) ) ) {
1868 return false;
1869 }
1870 index += 3;
1871 previndex = index;
1872 }
1873 return first.get().substring(previndex).equals( second.get().substring(previndex) );
1874 }
1875
1876 /*
1877 * Form a string from the components of this Uri, similarly to the
1878 * toString() method. But this method converts scheme and host to lowercase,
1879 * and converts escaped octets to lowercase.
1880 */
1881 private String getHashString() {
1882 final StringBuilder result = new StringBuilder();
1883 if (scheme != null) {
1884 result.append(scheme.get().toLowerCase());
1885 result.append(SCHEME_SEPARATOR);
1886 }
1887 if (opaque) {
1888 result.append(schemeSpecificPart.get());
1889 } else {
1890 if (authority != null) {
1891 result.append("//");
1892 if (host == null) {
1893 result.append(authority.get());
1894 } else {
1895 if (userInfo != null) {
1896 result.append(userInfo.get() + "@");
1897 }
1898 result.append(host.get().toLowerCase());
1899 if (port != -1) {
1900 result.append(SCHEME_SEPARATOR + port);
1901 }
1902 }
1903 }
1904
1905 if (path != null) {
1906 result.append(path.get());
1907 }
1908
1909 if (query != null) {
1910 result.append(QUERY_SEPARATOR);
1911 result.append(query.get());
1912 }
1913 }
1914
1915 if (fragment != null) {
1916 result.append(FRAGMENT_SEPARATOR);
1917 result.append(fragment.get());
1918 }
1919 return convertHexToLowerCase(result.toString());
1920 }
1921
1922 /**
1923 *
1924 * @param input
1925 * @param expectServer
1926 * @param parseHints TODO
1927 * @throws URISyntaxException
1928 */
1929 private Uri(final Encoded input, final boolean expectServer, final int parseHints) throws URISyntaxException {
1930 if( emptyString(input) ) {
1931 throw new URISyntaxException(input.get(), "empty input");
1932 }
1933 String temp = input.get();
1934 int index;
1935 // parse into Fragment, Scheme, and SchemeSpecificPart
1936 // then parse SchemeSpecificPart if necessary
1937
1938 // Fragment
1939 index = temp.indexOf(FRAGMENT_SEPARATOR);
1940 if (index != -1) {
1941 // remove the fragment from the end
1942 fragment = new Encoded( temp.substring(index + 1) );
1943 validateFragment(input, fragment, index + 1);
1944 temp = temp.substring(0, index);
1945 } else {
1946 fragment = null;
1947 }
1948
1949 String inputTemp = input.get(); // may get modified due to error correction
1950
1951 // Scheme and SchemeSpecificPart
1952 final int indexSchemeSep = temp.indexOf(SCHEME_SEPARATOR);
1953 index = indexSchemeSep;
1954 final int indexSSP = temp.indexOf('/');
1955 final int indexQuerySep = temp.indexOf(QUERY_SEPARATOR);
1956
1957 String sspTemp; // may get modified due to error correction
1958
1959 // if a '/' or '?' occurs before the first ':' the uri has no
1960 // specified scheme, and is therefore not absolute
1961 if ( indexSchemeSep != -1 &&
1962 ( indexSSP >= indexSchemeSep || indexSSP == -1 ) &&
1963 ( indexQuerySep >= indexSchemeSep || indexQuerySep == -1 )
1964 ) {
1965 // the characters up to the first ':' comprise the scheme
1966 absolute = true;
1967 scheme = new Encoded( temp.substring(0, indexSchemeSep) );
1968 if (scheme.length() == 0) {
1969 failExpecting(input, "scheme", indexSchemeSep);
1970 }
1971 validateScheme(input, scheme, 0);
1972 sspTemp = temp.substring(indexSchemeSep + 1);
1973 if (sspTemp.length() == 0) {
1974 failExpecting(input, "scheme-specific-part", indexSchemeSep);
1975 }
1976 } else {
1977 absolute = false;
1978 scheme = null;
1979 sspTemp = temp;
1980 }
1981
1982 if ( scheme == null || sspTemp.length() > 0 && sspTemp.charAt(0) == '/' ) {
1983 // Uri is hierarchical, not opaque
1984 opaque = false;
1985
1986 // Query
1987 temp = sspTemp;
1988 index = temp.indexOf(QUERY_SEPARATOR);
1989 if (index != -1) {
1990 query = new Encoded( temp.substring(index + 1) );
1991 temp = temp.substring(0, index);
1992 validateQuery(input, query, indexSSP + 1 + index);
1993 } else {
1994 query = null;
1995 }
1996
1997 String pathTemp; // may get modified due to error correction
1998 final int indexPathInSSP;
1999
2000 // Authority and Path
2001 if (temp.startsWith("//")) {
2002 index = temp.indexOf('/', 2);
2003 final String authorityS;
2004 if (index != -1) {
2005 authorityS = temp.substring(2, index);
2006 pathTemp = temp.substring(index);
2007 indexPathInSSP = index;
2008 } else {
2009 authorityS = temp.substring(2);
2010 if (authorityS.length() == 0 && query == null && fragment == null) {
2011 failExpecting(input, "authority, path [, query, fragment]", index);
2012 }
2013 pathTemp = "";
2014 indexPathInSSP = -1;
2015 // nothing left, so path is empty
2016 // (not null, path should never be null if hierarchical/non-opaque)
2017 }
2018 if ( emptyString(authorityS) ) {
2019 authority = null;
2020 } else {
2021 authority = new Encoded( authorityS );
2022 validateAuthority(input, authority, indexSchemeSep + 3);
2023 }
2024 } else { // no authority specified
2025 pathTemp = temp;
2026 indexPathInSSP = 0;
2027 authority = null;
2028 }
2029
2030 int indexPath = 0; // in input
2031 if (indexSSP > -1) {
2032 indexPath += indexSSP;
2033 }
2034 if (indexPathInSSP > -1) {
2035 indexPath += indexPathInSSP;
2036 }
2037
2038 final int pathErrIdx = validateEncoded(pathTemp, PATH_LEGAL);
2039 if( 0 <= pathErrIdx ) {
2040 // Perform error correction on PATH if requested!
2041 if( 0 != ( parseHints & PARSE_HINT_FIX_PATH ) ) {
2042 if( DEBUG_SHOWFIX ) {
2043 System.err.println("Uri FIX_FILEPATH: input at index "+(indexPath+pathErrIdx)+": "+inputTemp);
2044 System.err.println("Uri FIX_FILEPATH: ssp at index "+(indexPathInSSP+pathErrIdx)+": "+sspTemp);
2045 System.err.println("Uri FIX_FILEPATH: path at index "+pathErrIdx+": "+pathTemp);
2046 }
2047 final int pathTempOldLen = pathTemp.length();
2048 pathTemp = encode( decode( pathTemp ), PATH_LEGAL); // re-encode, and hope for the best!
2049 validatePath(input, pathTemp, indexPath); // re-validate!
2050 {
2051 // Patch SSP + INPUT !
2052 final StringBuilder sb = new StringBuilder();
2053 if( indexPathInSSP > 0 ) {
2054 sb.append( sspTemp.substring(0, indexPathInSSP) );
2055 }
2056 sb.append( pathTemp ).append( sspTemp.substring( indexPathInSSP + pathTempOldLen ) );
2057 sspTemp = sb.toString(); // update
2058
2059 sb.setLength(0);
2060 if( indexPath > 0 ) {
2061 sb.append( inputTemp.substring(0, indexPath) );
2062 }
2063 sb.append( pathTemp ).append( inputTemp.substring( indexPath + pathTempOldLen ) );
2064 inputTemp = sb.toString(); // update
2065 }
2066 if( DEBUG_SHOWFIX ) {
2067 System.err.println("Uri FIX_FILEPATH: result : "+pathTemp);
2068 System.err.println("Uri FIX_FILEPATH: ssp after : "+sspTemp);
2069 System.err.println("Uri FIX_FILEPATH: input after : "+inputTemp);
2070 }
2071 } else {
2072 fail(input, "invalid path", indexPath+pathErrIdx);
2073 }
2074 }
2075 path = new Encoded( pathTemp );
2076 } else {
2077 // Uri is not hierarchical, Uri is opaque
2078 opaque = true;
2079 query = null;
2080 path = null;
2081 authority = null;
2082 validateSsp(input, sspTemp, indexSchemeSep + 1);
2083 }
2084 schemeSpecificPart = new Encoded( sspTemp );
2085 this.input = inputTemp == input.get() ? input : new Encoded( inputTemp );
2086
2087 /**
2088 * determine the host, port and userinfo if the authority parses
2089 * successfully to a server based authority
2090 *
2091 * Behavior in error cases: if forceServer is true, throw
2092 * URISyntaxException with the proper diagnostic messages. if
2093 * forceServer is false assume this is a registry based uri, and just
2094 * return leaving the host, port and userinfo fields undefined.
2095 *
2096 * and there are some error cases where URISyntaxException is thrown
2097 * regardless of the forceServer parameter e.g. malformed ipv6 address
2098 */
2099 Encoded tempUserinfo = null, tempHost = null;
2100 int tempPort = -1;
2101 boolean authorityComplete;
2102
2103 if ( null != authority ) {
2104 authorityComplete = true; // set to false later
2105 int hostindex = 0;
2106
2107 temp = authority.get();
2108 index = temp.indexOf('@');
2109 if (index != -1) {
2110 // remove user info
2111 tempUserinfo = new Encoded( temp.substring(0, index) );
2112 validateUserinfo(authority, tempUserinfo, 0);
2113 temp = temp.substring(index + 1); // host[:port] is left
2114 hostindex = index + 1;
2115 }
2116
2117 index = temp.lastIndexOf(SCHEME_SEPARATOR);
2118 final int endindex = temp.indexOf(']');
2119
2120 if (index != -1 && endindex < index) {
2121 // determine port and host
2122 tempHost = new Encoded( temp.substring(0, index) );
2123
2124 if (index < (temp.length() - 1)) { // port part is not empty
2125 try {
2126 tempPort = Integer.parseInt(temp.substring(index + 1));
2127 if (tempPort < 0) {
2128 if (expectServer) {
2129 fail(authority, "invalid port <"+authority+">", hostindex + index + 1);
2130 }
2131 authorityComplete = false;
2132 }
2133 } catch (final NumberFormatException e) {
2134 if (expectServer) {
2135 fail(authority, "invalid port <"+authority+">, "+e.getMessage(), hostindex + index + 1);
2136 }
2137 authorityComplete = false;
2138 }
2139 }
2140 } else {
2141 tempHost = new Encoded( temp );
2142 }
2143
2144 if( authorityComplete ) {
2145 if ( emptyString(tempHost) ) {
2146 if (expectServer) {
2147 fail(authority, "empty host <"+authority+">", hostindex);
2148 }
2149 authorityComplete = false;
2150 } else if (!isValidHost(expectServer, tempHost)) {
2151 if (expectServer) {
2152 fail(authority, "invalid host <"+tempHost+">", hostindex);
2153 }
2154 authorityComplete = false;
2155 }
2156 }
2157 } else {
2158 authorityComplete = false;
2159 }
2160
2161 if( authorityComplete ) {
2162 // this is a server based uri,
2163 // fill in the userinfo, host and port fields
2164 userInfo = tempUserinfo;
2165 host = tempHost;
2166 port = tempPort;
2167 hasAuthority = true;
2168 } else {
2169 userInfo = null;
2170 host = null;
2171 port = -1;
2172 hasAuthority = false;
2173 }
2174 }
2175
2176 private static void validateScheme(final Encoded uri, final Encoded scheme, final int index) throws URISyntaxException {
2177 // first char needs to be an alpha char
2178 final char ch = scheme.charAt(0);
2179 if ( !((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) ) {
2180 fail(uri, "invalid scheme", index);
2181 }
2182 final int errIdx = validateAlphaNum(scheme.get(), "+-.");
2183 if( 0 <= errIdx ) {
2184 fail(uri, "invalid scheme", index+errIdx);
2185 }
2186 }
2187
2188 private static void validateSsp(final Encoded uri, final String ssp, final int index) throws URISyntaxException {
2189 final int errIdx = validateEncoded(ssp, SSP_LEGAL);
2190 if( 0 <= errIdx ) {
2191 fail(uri, "invalid scheme-specific-part", index+errIdx);
2192 }
2193 }
2194
2195 private static void validateAuthority(final Encoded uri, final Encoded authority, final int index) throws URISyntaxException {
2196 final int errIdx = validateEncoded(authority.get(), AUTHORITY_LEGAL);
2197 if( 0 <= errIdx ) {
2198 fail(uri, "invalid authority", index+errIdx);
2199 }
2200 }
2201
2202 private static void validatePath(final Encoded uri, final String path, final int index) throws URISyntaxException {
2203 final int errIdx = validateEncoded(path, PATH_LEGAL);
2204 if( 0 <= errIdx ) {
2205 fail(uri, "invalid path", index+errIdx);
2206 }
2207 }
2208
2209 private static void validateQuery(final Encoded uri, final Encoded query, final int index) throws URISyntaxException {
2210 final int errIdx = validateEncoded(query.get(), QUERY_LEGAL);
2211 if( 0 <= errIdx ) {
2212 fail(uri, "invalid query", index+errIdx);
2213 }
2214 }
2215
2216 private static void validateFragment(final Encoded uri, final Encoded fragment, final int index) throws URISyntaxException {
2217 final int errIdx = validateEncoded(fragment.get(), FRAG_LEGAL);
2218 if( 0 <= errIdx ) {
2219 fail(uri, "invalid fragment", index+errIdx);
2220 }
2221 }
2222
2223 private static void validateUserinfo(final Encoded uri, final Encoded userinfo, final int index) throws URISyntaxException {
2224 for (int i = 0; i < userinfo.length(); i++) {
2225 final char ch = userinfo.charAt(i);
2226 if (ch == ']' || ch == '[') {
2227 fail(uri, "invalid userinfo", index+i);
2228 }
2229 }
2230 }
2231
2232 /**
2233 * distinguish between IPv4, IPv6, domain name and validate it based on
2234 * its type
2235 */
2236 private boolean isValidHost(final boolean expectServer, final Encoded host) throws URISyntaxException {
2237 if (host.charAt(0) == '[') {
2238 // ipv6 address
2239 if (host.charAt(host.length() - 1) != ']') {
2240 fail(input, "invalid host, missing closing ipv6: "+host, 0);
2241 }
2242 if (!isValidIP6Address(host.get())) {
2243 fail(input, "invalid ipv6: "+host, 0);
2244 }
2245 return true;
2246 }
2247
2248 // '[' and ']' can only be the first char and last char
2249 // of the host name
2250 if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
2251 fail(input, "invalid host: "+host, 0);
2252 }
2253
2254 final int index = host.lastIndexOf('.');
2255 if ( index < 0 || index == host.length() - 1 ||
2256 !Character.isDigit(host.charAt(index + 1)) )
2257 {
2258 // domain name
2259 if (isValidDomainName(host)) {
2260 return true;
2261 }
2262 if (expectServer) {
2263 fail(input, "invalid host, invalid domain-name or ipv4: "+host, 0);
2264 }
2265 return false;
2266 }
2267
2268 // IPv4 address
2269 if (isValidIPv4Address(host.get())) {
2270 return true;
2271 }
2272 if (expectServer) {
2273 fail(input, "invalid host, invalid ipv4: "+host, 0);
2274 }
2275 return false;
2276 }
2277
2278 private static boolean isValidDomainName(final Encoded host) {
2279 final String hostS = host.get();
2280 if( 0 <= validateAlphaNum(hostS, "-.") ) {
2281 return false;
2282 }
2283 String label = null;
2284 final StringTokenizer st = new StringTokenizer(hostS, ".");
2285 while (st.hasMoreTokens()) {
2286 label = st.nextToken();
2287 if (label.startsWith("-") || label.endsWith("-")) {
2288 return false;
2289 }
2290 }
2291
2292 if (!label.equals(hostS)) {
2293 final char ch = label.charAt(0);
2294 if (ch >= '0' && ch <= '9') {
2295 return false;
2296 }
2297 }
2298 return true;
2299 }
2300
2301 private static boolean isValidIPv4Address(final String ipv4Address) {
2302 int index;
2303 int index2;
2304 try {
2305 int num;
2306 index = ipv4Address.indexOf('.');
2307 num = Integer.parseInt(ipv4Address.substring(0, index));
2308 if (num < 0 || num > 255) {
2309 return false;
2310 }
2311 index2 = ipv4Address.indexOf('.', index + 1);
2312 num = Integer.parseInt(ipv4Address.substring(index + 1, index2));
2313 if (num < 0 || num > 255) {
2314 return false;
2315 }
2316 index = ipv4Address.indexOf('.', index2 + 1);
2317 num = Integer.parseInt(ipv4Address.substring(index2 + 1, index));
2318 if (num < 0 || num > 255) {
2319 return false;
2320 }
2321 num = Integer.parseInt(ipv4Address.substring(index + 1));
2322 if (num < 0 || num > 255) {
2323 return false;
2324 }
2325 } catch (final Exception e) {
2326 return false;
2327 }
2328 return true;
2329 }
2330
2331 private static boolean isValidIP6Address(final String ipv6Address) {
2332 final int length = ipv6Address.length();
2333 boolean doubleColon = false;
2334 int numberOfColons = 0;
2335 int numberOfPeriods = 0;
2336 String word = "";
2337 char c = 0;
2338 char prevChar = 0;
2339 int offset = 0; // offset for [] ip addresses
2340
2341 if (length < 2) {
2342 return false;
2343 }
2344
2345 for (int i = 0; i < length; i++) {
2346 prevChar = c;
2347 c = ipv6Address.charAt(i);
2348 switch (c) {
2349
2350 // case for an open bracket [x:x:x:...x]
2351 case '[':
2352 if (i != 0) {
2353 return false; // must be first character
2354 }
2355 if (ipv6Address.charAt(length - 1) != ']') {
2356 return false; // must have a close ]
2357 }
2358 if ((ipv6Address.charAt(1) == SCHEME_SEPARATOR)
2359 && (ipv6Address.charAt(2) != SCHEME_SEPARATOR)) {
2360 return false;
2361 }
2362 offset = 1;
2363 if (length < 4) {
2364 return false;
2365 }
2366 break;
2367
2368 // case for a closed bracket at end of IP [x:x:x:...x]
2369 case ']':
2370 if (i != length - 1) {
2371 return false; // must be last character
2372 }
2373 if (ipv6Address.charAt(0) != '[') {
2374 return false; // must have a open [
2375 }
2376 break;
2377
2378 // case for the last 32-bits represented as IPv4
2379 // x:x:x:x:x:x:d.d.d.d
2380 case '.':
2381 numberOfPeriods++;
2382 if (numberOfPeriods > 3) {
2383 return false;
2384 }
2385 if (!isValidIP4Word(word)) {
2386 return false;
2387 }
2388 if (numberOfColons != 6 && !doubleColon) {
2389 return false;
2390 }
2391 // a special case ::1:2:3:4:5:d.d.d.d allows 7 colons
2392 // with
2393 // an IPv4 ending, otherwise 7 :'s is bad
2394 if (numberOfColons == 7
2395 && ipv6Address.charAt(0 + offset) != SCHEME_SEPARATOR
2396 && ipv6Address.charAt(1 + offset) != SCHEME_SEPARATOR) {
2397 return false;
2398 }
2399 word = "";
2400 break;
2401
2402 case SCHEME_SEPARATOR:
2403 numberOfColons++;
2404 if (numberOfColons > 7) {
2405 return false;
2406 }
2407 if (numberOfPeriods > 0) {
2408 return false;
2409 }
2410 if (prevChar == SCHEME_SEPARATOR) {
2411 if (doubleColon) {
2412 return false;
2413 }
2414 doubleColon = true;
2415 }
2416 word = "";
2417 break;
2418
2419 default:
2420 if (word.length() > 3) {
2421 return false;
2422 }
2423 if (!isValidHexChar(c)) {
2424 return false;
2425 }
2426 word += c;
2427 }
2428 }
2429
2430 // Check if we have an IPv4 ending
2431 if (numberOfPeriods > 0) {
2432 if (numberOfPeriods != 3 || !isValidIP4Word(word)) {
2433 return false;
2434 }
2435 } else {
2436 // If we're at then end and we haven't had 7 colons then there
2437 // is a problem unless we encountered a doubleColon
2438 if (numberOfColons != 7 && !doubleColon) {
2439 return false;
2440 }
2441
2442 // If we have an empty word at the end, it means we ended in
2443 // either a : or a .
2444 // If we did not end in :: then this is invalid
2445 if (word == "" && ipv6Address.charAt(length - 1 - offset) != SCHEME_SEPARATOR
2446 && ipv6Address.charAt(length - 2 - offset) != SCHEME_SEPARATOR) {
2447 return false;
2448 }
2449 }
2450
2451 return true;
2452 }
2453
2454 private static boolean isValidIP4Word(final String word) {
2455 char c;
2456 if (word.length() < 1 || word.length() > 3) {
2457 return false;
2458 }
2459 for (int i = 0; i < word.length(); i++) {
2460 c = word.charAt(i);
2461 if (!(c >= '0' && c <= '9')) {
2462 return false;
2463 }
2464 }
2465 if (Integer.parseInt(word) > 255) {
2466 return false;
2467 }
2468 return true;
2469 }
2470
2471 /**
2472 * Validate a string by checking if it contains any characters other than:
2473 * <ol>
2474 * <li>letters ('a'..'z', 'A'..'Z')</li>
2475 * <li>numbers ('0'..'9')</li>
2476 * <li>characters in the legal-set parameter</li>
2477 * <li> others (unicode characters that are not in
2478 * US-ASCII set, and are not ISO Control or are not ISO Space characters)</li>
2479 * </ol>
2480 *
2481 * @param encoded
2482 * {@code java.lang.String} the string to be validated
2483 * @param legal
2484 * {@code java.lang.String} the characters allowed in the String
2485 * s
2486 */
2487 private static int validateEncoded(final String encoded, final String legal) {
2488 for (int i = 0; i < encoded.length();) {
2489 final char ch = encoded.charAt(i);
2490 if (ch == '%') {
2491 do {
2492 if (i + 2 >= encoded.length()) {
2493 throw new IllegalArgumentException("missing '%' hex-digits at index "+i);
2494 }
2495 final int d1 = Character.digit(encoded.charAt(i + 1), 16);
2496 final int d2 = Character.digit(encoded.charAt(i + 2), 16);
2497 if (d1 == -1 || d2 == -1) {
2498 throw new IllegalArgumentException("invalid hex-digits at index "+i+": "+encoded.substring(i, i + 3));
2499 }
2500 i += 3;
2501 } while (i < encoded.length() && encoded.charAt(i) == '%');
2502 continue;
2503 }
2504 if ( !( (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
2505 (ch >= '0' && ch <= '9') || legal.indexOf(ch) > -1 ||
2506 (ch > 127 && !Character.isSpaceChar(ch) && !Character.isISOControl(ch))
2507 )
2508 ) {
2509 return i;
2510 }
2511 i++;
2512 }
2513 return -1;
2514 }
2515 private static int validateAlphaNum(final String s, final String legal) {
2516 for (int i = 0; i < s.length();) {
2517 final char ch = s.charAt(i);
2518 if ( !( (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
2519 (ch >= '0' && ch <= '9') || legal.indexOf(ch) > -1
2520 )
2521 ) {
2522 return i;
2523 }
2524 i++;
2525 }
2526 return -1;
2527 }
2528
2529 private static boolean isValidHexChar(final char c) {
2530 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
2531 }
2532 private static boolean emptyString(final Encoded s) {
2533 return null == s || 0 == s.length();
2534 }
2535 private static boolean emptyString(final String s) {
2536 return null == s || 0 == s.length();
2537 }
2538
2539 private static void fail(final Encoded input, final String reason, final int p) throws URISyntaxException {
2540 throw new URISyntaxException(input.get(), reason, p);
2541 }
2542 private static void failExpecting(final Encoded input, final String expected, final int p) throws URISyntaxException {
2543 fail(input, "Expecting " + expected, p);
2544 }
2545
2546 static class Util {
2547 private static final Pattern patternSingleBS = Pattern.compile("\\\\{1}");
2548 /**
2549 *
2550 * @param path
2551 * @param startWithSlash
2552 * @param endWithSlash
2553 * @return
2554 * @throws URISyntaxException if path is empty or has no parent directory available while resolving <code>../</code>
2555 */
2556 public static String slashify(final String path, final boolean startWithSlash, final boolean endWithSlash) throws URISyntaxException {
2557 String p = patternSingleBS.matcher(path).replaceAll("/");
2558 if (startWithSlash && !p.startsWith("/")) {
2559 p = "/" + p;
2560 }
2561 if (endWithSlash && !p.endsWith("/")) {
2562 p = p + "/";
2563 }
2564 return cleanPathString(p);
2565 }
2566 /**
2567 * @param path assuming a slashified path, either denotes a file or directory, either relative or absolute.
2568 * @return parent of path
2569 * @throws URISyntaxException if path is empty or has no parent directory available
2570 */
2571 public static String getParentOf(final String path) throws URISyntaxException {
2572 final int pl = null!=path ? path.length() : 0;
2573 if(pl == 0) {
2574 throw new IllegalArgumentException("path is empty <"+path+">");
2575 }
2576
2577 final int e = path.lastIndexOf("/");
2578 if( e < 0 ) {
2579 throw new URISyntaxException(path, "path contains no '/': <"+path+">");
2580 }
2581 if( e == 0 ) {
2582 // path is root directory
2583 throw new URISyntaxException(path, "path has no parents: <"+path+">");
2584 }
2585 if( e < pl - 1 ) {
2586 // path is file, return it's parent directory
2587 return path.substring(0, e+1);
2588 }
2589 final int j = path.lastIndexOf("!") + 1; // '!' Separates JARFile entry -> local start of path
2590 // path is a directory ..
2591 final int p = path.lastIndexOf("/", e-1);
2592 if( p >= j) {
2593 // parent itself has '/' - post '!' or no '!' at all
2594 return path.substring(0, p+1);
2595 } else {
2596 // parent itself has no '/'
2597 final String parent = path.substring(j, e);
2598 if( parent.equals("..") ) {
2599 throw new URISyntaxException(path, "parent is unresolved: <"+path+">");
2600 } else {
2601 // parent is '!' or empty (relative path)
2602 return path.substring(0, j);
2603 }
2604 }
2605 }
2606
2607 /**
2608 * @param path assuming a slashified path, either denoting a file or directory, either relative or absolute.
2609 * @return clean path string where {@code ./} and {@code ../} is resolved,
2610 * while keeping a starting {@code ../} at the beginning of a relative path.
2611 * @throws URISyntaxException if path is empty or has no parent directory available while resolving <code>../</code>
2612 */
2613 public static String cleanPathString(String path) throws URISyntaxException {
2614 // Resolve './' before '../' to handle case 'parent/./../a.txt' properly.
2615 int idx = path.length() - 1;
2616 while ( idx >= 1 && ( idx = path.lastIndexOf("./", idx) ) >= 0 ) {
2617 if( 0 < idx && path.charAt(idx-1) == '.' ) {
2618 idx-=2; // skip '../' -> idx upfront
2619 } else {
2620 path = path.substring(0, idx) + path.substring(idx+2);
2621 idx--; // idx upfront
2622 }
2623 }
2624 idx = 0;
2625 while ( ( idx = path.indexOf("../", idx) ) >= 0 ) {
2626 if( 0 == idx ) {
2627 idx += 3; // skip starting '../'
2628 } else {
2629 path = getParentOf(path.substring(0, idx)) + path.substring(idx+3);
2630 idx = 0;
2631 }
2632 }
2633 return path;
2634 }
2635 }
2636}
static ASCIIEncoded cast(final String encoded)
Casts the given encoded String by creating a new ASCIIEncoded instance.
Definition: Uri.java:461
ASCIIEncoded(final String unicode)
Other characters, which are Unicode chars that are not US-ASCII, and are not ISO Control or are not I...
Definition: Uri.java:480
Immutable RFC3986 encoded string.
Definition: Uri.java:301
final int indexOf(final String str, final int fromIndex)
See String#indexOf(String, int).
Definition: Uri.java:432
final int length()
Definition: Uri.java:403
final int indexOf(final String str)
See String#indexOf(String).
Definition: Uri.java:430
Encoded(final String vanilla, final String legal)
Encodes all characters into their hexadecimal value prepended by '', except:
Definition: Uri.java:334
final int hashCode()
Definition: Uri.java:374
boolean startsWith(final String prefix)
See String#startsWith(String).
Definition: Uri.java:444
static Encoded cast(final String encoded)
Casts the given encoded String by creating a new Encoded instance.
Definition: Uri.java:310
final String get()
Returns the encoded String.
Definition: Uri.java:341
int lastIndexOf(final String str)
See String#lastIndexOf(String).
Definition: Uri.java:439
final CharSequence subSequence(final int start, final int end)
Definition: Uri.java:409
Encoded concat(final Encoded encoded)
See String#concat(String).
Definition: Uri.java:418
final int lastIndexOf(final int ch)
See String#lastIndexOf(int).
Definition: Uri.java:435
final int indexOf(final int ch, final int fromIndex)
See String#indexOf(int, int).
Definition: Uri.java:428
final int indexOf(final int ch)
See String#indexOf(int).
Definition: Uri.java:426
int lastIndexOf(final int ch, final int fromIndex)
See String#lastIndexOf(int, int).
Definition: Uri.java:437
int lastIndexOf(final String str, final int fromIndex)
See String#lastIndexOf(String, int).
Definition: Uri.java:441
boolean isASCII()
Definition: Uri.java:338
final String toString()
Definition: Uri.java:371
boolean startsWith(final String prefix, final int toffset)
See String#startsWith(String, int).
Definition: Uri.java:446
final boolean equalsIgnoreCase(final Encoded anotherEncoded)
See String#equalsIgnoreCase(String).
Definition: Uri.java:451
boolean endsWith(final String suffix)
See String#endsWith(String).
Definition: Uri.java:448
final Encoded substring(final int start, final int end)
See String#substring(int, int).
Definition: Uri.java:423
final int compareTo(final Encoded o)
Definition: Uri.java:412
final boolean equals(final Object o)
Definition: Uri.java:388
final Encoded substring(final int start)
See String#substring(int).
Definition: Uri.java:421
final char charAt(final int index)
Definition: Uri.java:406
final String decode()
Decodes the string argument which is assumed to be encoded in the x-www-form-urlencoded MIME conten...
Definition: Uri.java:358
This class implements an immutable Uri as defined by RFC 2396.
Definition: Uri.java:162
Uri getDirectory()
Returns this Uri's directory Uri.
Definition: Uri.java:1599
static final String PATH_LEGAL
Valid charset for RFC 2396 path, additional to legal alphanum characters.
Definition: Uri.java:248
final java.net.URI toURI()
Returns a new URI instance using the encoded input string, new URI(uri.input), i.e.
Definition: Uri.java:1285
final Encoded scheme
Encoded scheme, null if undefined.
Definition: Uri.java:1191
final int port
Encoded port part of authority and scheme-specific-part, -1 if undefined.
Definition: Uri.java:1207
final String toString()
Returns the encoded input as String, never null, same as getEncoded().
Definition: Uri.java:1263
final Uri concat(final Encoded suffix)
Concatenates the given encoded string to the encoded uri of this instance and returns a new Uri insta...
Definition: Uri.java:1702
static Uri create(final Encoded scheme, final Encoded ssp, final Encoded fragment)
Creates a new Uri instance using the given encoded arguments.
Definition: Uri.java:706
final boolean isFileScheme()
Returns true, if this instance is a file scheme, otherwise false.
Definition: Uri.java:1240
static final String RESERVED
RFC 3986 section 2.2 Reserved Characters (January 2005)
Definition: Uri.java:213
final String getUriFilePathOrASCII()
If this uri is a file scheme implementation returns toFile().
Definition: Uri.java:1381
final Encoded getEncoded()
Returns the encoded input, never null.
Definition: Uri.java:1255
final Encoded host
Encoded host part of authority and scheme-specific-part, null if undefined.
Definition: Uri.java:1205
static Uri valueOf(final java.net.URI uri)
Creates a new Uri instance using the given URI instance.
Definition: Uri.java:1144
final Encoded authority
Encoded authority part of scheme-specific-part, null if undefined.
Definition: Uri.java:1201
final Uri getParent()
Returns this Uri's parent directory Uri.
Definition: Uri.java:1641
static String decode(final Encoded encoded)
Safe Encoded#decode() call on optional encoded instance.
Definition: Uri.java:577
static final String QUERY_LEGAL
Valid charset for RFC 2396 query, additional to legal alphanum characters.
Definition: Uri.java:258
ASCIIEncoded toASCIIString()
Returns the encoded input encoded in US-ASCII.
Definition: Uri.java:1270
final java.net.URL toURL()
Returns a new URL instance using the encoded input string, new URL(uri.input), i.e.
Definition: Uri.java:1331
static final String RESERVED_2
Definition: Uri.java:216
final Encoded schemeSpecificPart
Encoded scheme-specific-part, never null.
Definition: Uri.java:1194
final Uri getNewQuery(final Encoded newQuery)
Returns a new Uri instance w/ the given new query newQuery.
Definition: Uri.java:1717
static final String HTTP_SCHEME
{@value}
Definition: Uri.java:290
final boolean isJarScheme()
Returns true, if this instance is a jar scheme, otherwise false.
Definition: Uri.java:1248
static final char QUERY_SEPARATOR
{@value}
Definition: Uri.java:284
static final String UNRESERVED
RFC 3986 section 2.3 Unreserved Characters (January 2005)
Definition: Uri.java:201
static Uri create(final String scheme, final String host, final String path, final String fragment)
Creates a new Uri instance using the given unencoded arguments.
Definition: Uri.java:912
static String decode(final String encoded)
Decodes the string argument which is assumed to be encoded in the x-www-form-urlencoded MIME conten...
Definition: Uri.java:596
final boolean hasAuthority
Indicating whether authority part is defined or not.
Definition: Uri.java:1199
static Uri cast(final String encodedUri)
Casts the given encoded String to a new Encoded instance used to create the resulting Uri instance vi...
Definition: Uri.java:1073
Uri(final Encoded uri)
Creates a new Uri instance according to the given encoded string uri.
Definition: Uri.java:1235
final boolean equals(final Object o)
Definition: Uri.java:1740
final Uri getContainedUri()
If this instance's schemeSpecificPart contains a Uri itself, a sub-Uri, return schemeSpecificPart + #...
Definition: Uri.java:1414
final Encoded userInfo
Encoded userinfo part of authority and scheme-specific-part, null if undefined.
Definition: Uri.java:1203
static Uri valueOfFilepath(final String path)
Creates a new Uri instance using the given file-path argument.
Definition: Uri.java:1092
final Encoded fragment
Encoded fragment, null if undefined.
Definition: Uri.java:1213
static Uri create(final Encoded scheme, final Encoded userinfo, final Encoded host, final int port, final Encoded path, final Encoded query, final Encoded fragment)
Creates a new Uri instance using the given encoded arguments.
Definition: Uri.java:840
static final String FRAG_LEGAL
Valid charset for RFC 2396 fragment, additional to legal alphanum characters.
Definition: Uri.java:278
static final String SSP_LEGAL
Valid charset for RFC 2396 scheme-specific-part, additional to legal alphanum characters.
Definition: Uri.java:268
static Uri valueOf(final java.net.URL url)
Creates a new Uri instance using the given URL instance, convenient wrapper for valueOf(URI) and URL#...
Definition: Uri.java:1172
static Uri valueOf(final File file)
Creates a new Uri instance using the given File instance.
Definition: Uri.java:1126
static final String FILE_SCHEME
{@value}
Definition: Uri.java:288
static Uri create(final String scheme, final String userinfo, String host, final int port, final String path, final String query, final String fragment)
Creates a new Uri instance using the given unencoded arguments.
Definition: Uri.java:750
static final String JAR_SCHEME
{@value}
Definition: Uri.java:294
final int hashCode()
Definition: Uri.java:1822
final File toFile()
If this instance is a file scheme, implementation decodes [ "//"+authority ] + path,...
Definition: Uri.java:1352
static final String AUTHORITY_LEGAL
Valid charset for RFC 2396 authority, additional to legal alphanum characters.
Definition: Uri.java:239
static Uri create(final Encoded scheme, final Encoded host, final Encoded path, final Encoded fragment)
Creates a new Uri instance using the given encoded arguments.
Definition: Uri.java:941
final java.net.URI toURIReencoded()
Returns a new URI instance based upon this instance.
Definition: Uri.java:1306
static final String USERINFO_LEGAL
Valid charset for RFC 2396 authority's user-info, additional to legal alphanum characters.
Definition: Uri.java:229
final Encoded input
Encoded input string used at construction, never null.
Definition: Uri.java:1181
final Encoded query
Encoded query part of scheme-specific-part, null if undefined.
Definition: Uri.java:1210
static String encode(final String vanilla, final String legal)
All characters are encoded into their hexadecimal value prepended by '', except:
Definition: Uri.java:524
final boolean opaque
Indicating whether this Uri is opaque, i.e.
Definition: Uri.java:1225
static final String HTTPS_SCHEME
{@value}
Definition: Uri.java:292
static final char FRAGMENT_SEPARATOR
{@value}
Definition: Uri.java:286
final Encoded path
Encoded path part of scheme-specific-part, never null.
Definition: Uri.java:1196
static Uri create(final Encoded scheme, final Encoded authority, final Encoded path, final Encoded query, final Encoded fragment)
Creates a new Uri instance using the given encoded arguments.
Definition: Uri.java:1032
final boolean absolute
Indicating whether this Uri is absolute, i.e.
Definition: Uri.java:1216
static Uri create(final String scheme, final String authority, final String path, final String query, final String fragment)
Creates a new Uri instance using the given unencoded arguments.
Definition: Uri.java:968
static Uri create(final String scheme, final String ssp, final String fragment)
Creates a new Uri instance using the given unencoded arguments.
Definition: Uri.java:661
final Uri getNormalized()
Normalizes this Uri's path and return the normalized form if it differs, otherwise this instance.
Definition: Uri.java:1560
static String encodeToASCIIString(final String unicode)
Other characters, which are Unicode chars that are not US-ASCII, and are not ISO Control or are not I...
Definition: Uri.java:559
static final char JAR_SCHEME_SEPARATOR
A JAR sub-protocol is separated from the JAR entry w/ this separator {@value}.
Definition: Uri.java:296
Uri getRelativeOf(final Encoded appendPath)
Returns a new Uri appending the given appendPath to this instance's directory.
Definition: Uri.java:1686
static final char SCHEME_SEPARATOR
{@value}
Definition: Uri.java:282