001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.fileupload2.core; 018 019import java.io.ByteArrayInputStream; 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.OutputStream; 023import java.nio.charset.Charset; 024import java.nio.charset.StandardCharsets; 025import java.nio.file.CopyOption; 026import java.nio.file.Files; 027import java.nio.file.InvalidPathException; 028import java.nio.file.Path; 029import java.nio.file.Paths; 030import java.nio.file.StandardCopyOption; 031import java.util.UUID; 032import java.util.concurrent.atomic.AtomicInteger; 033 034import org.apache.commons.fileupload2.core.FileItemFactory.AbstractFileItemBuilder; 035import org.apache.commons.io.Charsets; 036import org.apache.commons.io.build.AbstractOrigin; 037import org.apache.commons.io.file.PathUtils; 038import org.apache.commons.io.output.DeferredFileOutputStream; 039 040/** 041 * The default implementation of the {@link FileItem FileItem} interface. 042 * <p> 043 * After retrieving an instance of this class from a {@link DiskFileItemFactory} instance (see 044 * {@code org.apache.commons.fileupload2.core.servlet.ServletFileUpload 045 * #parseRequest(javax.servlet.http.HttpServletRequest)}), you may either request all contents of file at once using {@link #get()} or request an 046 * {@link java.io.InputStream InputStream} with {@link #getInputStream()} and process the file without attempting to load it into memory, which may come handy 047 * with large files. 048 * </p> 049 * <p> 050 * Temporary files, which are created for file items, should be deleted later on. The best way to do this is using a 051 * {@link org.apache.commons.io.FileCleaningTracker}, which you can set on the {@link DiskFileItemFactory}. However, if you do use such a tracker, then you must 052 * consider the following: Temporary files are automatically deleted as soon as they are no longer needed. (More precisely, when the corresponding instance of 053 * {@link java.io.File} is garbage collected.) This is done by the so-called reaper thread, which is started and stopped automatically by the 054 * {@link org.apache.commons.io.FileCleaningTracker} when there are files to be tracked. It might make sense to terminate that thread, for example, if your web 055 * application ends. See the section on "Resource cleanup" in the users guide of Commons FileUpload. 056 * </p> 057 */ 058public final class DiskFileItem implements FileItem<DiskFileItem> { 059 060 /** 061 * Builds a new {@link DiskFileItem} instance. 062 * <p> 063 * For example: 064 * </p> 065 * 066 * <pre>{@code 067 * final FileItem fileItem = fileItemFactory.fileItemBuilder() 068 * .setFieldName("FieldName") 069 * .setContentType("ContentType") 070 * .setFormField(true) 071 * .setFileName("FileName") 072 * .setFileItemHeaders(...) 073 * .get(); 074 * } 075 * </pre> 076 */ 077 public static class Builder extends AbstractFileItemBuilder<DiskFileItem, Builder> { 078 079 /** 080 * Constructs a new instance. 081 */ 082 public Builder() { 083 setBufferSize(DiskFileItemFactory.DEFAULT_THRESHOLD); 084 setPath(PathUtils.getTempDirectory()); 085 setCharset(DEFAULT_CHARSET); 086 setCharsetDefault(DEFAULT_CHARSET); 087 } 088 089 /** 090 * Constructs a new instance. 091 * <p> 092 * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an 093 * {@link UnsupportedOperationException}. 094 * </p> 095 * 096 * @return a new instance. 097 * @throws UnsupportedOperationException if the origin cannot provide a Path. 098 * @see AbstractOrigin#getReader(Charset) 099 */ 100 @Override 101 public DiskFileItem get() { 102 final var diskFileItem = new DiskFileItem(getFieldName(), getContentType(), isFormField(), getFileName(), getBufferSize(), getPath(), 103 getFileItemHeaders(), getCharset()); 104 final var tracker = getFileCleaningTracker(); 105 if (tracker != null) { 106 tracker.track(diskFileItem.getTempFile().toFile(), diskFileItem); 107 } 108 return diskFileItem; 109 } 110 111 } 112 113 /** 114 * Default content charset to be used when no explicit charset parameter is provided by the sender. Media subtypes of the "text" type are defined to have a 115 * default charset value of "ISO-8859-1" when received via HTTP. 116 */ 117 public static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1; 118 119 /** 120 * UID used in unique file name generation. 121 */ 122 private static final String UID = UUID.randomUUID().toString().replace('-', '_'); 123 124 /** 125 * Counter used in unique identifier generation. 126 */ 127 private static final AtomicInteger COUNTER = new AtomicInteger(); 128 129 /** 130 * Constructs a new {@link Builder}. 131 * 132 * @return a new {@link Builder}. 133 */ 134 public static Builder builder() { 135 return new Builder(); 136 } 137 138 /** 139 * Tests if the file name is valid. For example, if it contains a NUL characters, it's invalid. If the file name is valid, it will be returned without any 140 * modifications. Otherwise, throw an {@link InvalidPathException}. 141 * 142 * @param fileName The file name to check 143 * @return Unmodified file name, if valid. 144 * @throws InvalidPathException The file name is invalid. 145 */ 146 public static String checkFileName(final String fileName) { 147 if (fileName != null) { 148 // Specific NUL check to build a better exception message. 149 final var indexOf0 = fileName.indexOf(0); 150 if (indexOf0 != -1) { 151 final var sb = new StringBuilder(); 152 for (var i = 0; i < fileName.length(); i++) { 153 final var c = fileName.charAt(i); 154 switch (c) { 155 case 0: 156 sb.append("\\0"); 157 break; 158 default: 159 sb.append(c); 160 break; 161 } 162 } 163 throw new InvalidPathException(fileName, sb.toString(), indexOf0); 164 } 165 // Throws InvalidPathException on invalid file names 166 Paths.get(fileName); 167 } 168 return fileName; 169 } 170 171 /** 172 * Gets an identifier that is unique within the class loader used to load this class, but does not have random-like appearance. 173 * 174 * @return A String with the non-random looking instance identifier. 175 */ 176 private static String getUniqueId() { 177 final var limit = 100_000_000; 178 final var current = COUNTER.getAndIncrement(); 179 var id = Integer.toString(current); 180 181 // If you manage to get more than 100 million of ids, you'll 182 // start getting ids longer than 8 characters. 183 if (current < limit) { 184 id = ("00000000" + id).substring(id.length()); 185 } 186 return id; 187 } 188 189 /** 190 * The name of the form field as provided by the browser. 191 */ 192 private String fieldName; 193 194 /** 195 * The content type passed by the browser, or {@code null} if not defined. 196 */ 197 private final String contentType; 198 199 /** 200 * Whether or not this item is a simple form field. 201 */ 202 private volatile boolean isFormField; 203 204 /** 205 * The original file name in the user's file system. 206 */ 207 private final String fileName; 208 209 /** 210 * The size of the item, in bytes. This is used to cache the size when a file item is moved from its original location. 211 */ 212 private volatile long size = -1; 213 214 /** 215 * The threshold above which uploads will be stored on disk. 216 */ 217 private final int threshold; 218 219 /** 220 * The directory in which uploaded files will be stored, if stored on disk. 221 */ 222 private final Path repository; 223 224 /** 225 * Cached contents of the file. 226 */ 227 private byte[] cachedContent; 228 229 /** 230 * Output stream for this item. 231 */ 232 private DeferredFileOutputStream dfos; 233 234 /** 235 * The temporary file to use. 236 */ 237 private final Path tempFile; 238 239 /** 240 * The file items headers. 241 */ 242 private FileItemHeaders fileItemHeaders; 243 244 /** 245 * Default content Charset to be used when no explicit Charset parameter is provided by the sender. 246 */ 247 private Charset charsetDefault = DEFAULT_CHARSET; 248 249 /** 250 * Constructs a new {@code DiskFileItem} instance. 251 * 252 * @param fieldName The name of the form field. 253 * @param contentType The content type passed by the browser or {@code null} if not specified. 254 * @param isFormField Whether or not this item is a plain form field, as opposed to a file upload. 255 * @param fileName The original file name in the user's file system, or {@code null} if not specified. 256 * @param threshold The threshold, in bytes, below which items will be retained in memory and above which they will be stored as a file. 257 * @param repository The data repository, which is the directory in which files will be created, should the item size exceed the threshold. 258 * @param fileItemHeaders The file item headers. 259 * @param defaultCharset The default Charset. 260 */ 261 private DiskFileItem(final String fieldName, final String contentType, final boolean isFormField, final String fileName, final int threshold, 262 final Path repository, final FileItemHeaders fileItemHeaders, final Charset defaultCharset) { 263 this.fieldName = fieldName; 264 this.contentType = contentType; 265 this.charsetDefault = defaultCharset; 266 this.isFormField = isFormField; 267 this.fileName = fileName; 268 this.fileItemHeaders = fileItemHeaders; 269 this.threshold = threshold; 270 this.repository = repository != null ? repository : PathUtils.getTempDirectory(); 271 this.tempFile = this.repository.resolve(String.format("upload_%s_%s.tmp", UID, getUniqueId())); 272 } 273 274 /** 275 * Deletes the underlying storage for a file item, including deleting any associated temporary disk file. This method can be used to ensure that this is 276 * done at an earlier time, thus preserving system resources. 277 * 278 * @throws IOException if an error occurs. 279 */ 280 @Override 281 public DiskFileItem delete() throws IOException { 282 cachedContent = null; 283 final var outputFile = getPath(); 284 if (outputFile != null && !isInMemory() && Files.exists(outputFile)) { 285 Files.delete(outputFile); 286 } 287 return this; 288 } 289 290 /** 291 * Gets the contents of the file as an array of bytes. If the contents of the file were not yet cached in memory, they will be loaded from the disk storage 292 * and cached. 293 * 294 * @return The contents of the file as an array of bytes or {@code null} if the data cannot be read. 295 * @throws IOException if an I/O error occurs. 296 * @throws OutOfMemoryError See {@link Files#readAllBytes(Path)}: If an array of the required size cannot be allocated, for example the file is larger 297 * that {@code 2GB} 298 */ 299 @Override 300 public byte[] get() throws IOException { 301 if (isInMemory()) { 302 if (cachedContent == null && dfos != null) { 303 cachedContent = dfos.getData(); 304 } 305 return cachedContent != null ? cachedContent.clone() : new byte[0]; 306 } 307 return Files.readAllBytes(dfos.getFile().toPath()); 308 } 309 310 /** 311 * Gets the content charset passed by the agent or {@code null} if not defined. 312 * 313 * @return The content charset passed by the agent or {@code null} if not defined. 314 */ 315 public Charset getCharset() { 316 final var parser = new ParameterParser(); 317 parser.setLowerCaseNames(true); 318 // Parameter parser can handle null input 319 final var params = parser.parse(getContentType(), ';'); 320 return Charsets.toCharset(params.get("charset"), charsetDefault); 321 } 322 323 /** 324 * Gets the default charset for use when no explicit charset parameter is provided by the sender. 325 * 326 * @return the default charset 327 */ 328 public Charset getCharsetDefault() { 329 return charsetDefault; 330 } 331 332 /** 333 * Gets the content type passed by the agent or {@code null} if not defined. 334 * 335 * @return The content type passed by the agent or {@code null} if not defined. 336 */ 337 @Override 338 public String getContentType() { 339 return contentType; 340 } 341 342 /** 343 * Gets the name of the field in the multipart form corresponding to this file item. 344 * 345 * @return The name of the form field. 346 * @see #setFieldName(String) 347 */ 348 @Override 349 public String getFieldName() { 350 return fieldName; 351 } 352 353 /** 354 * Gets the file item headers. 355 * 356 * @return The file items headers. 357 */ 358 @Override 359 public FileItemHeaders getHeaders() { 360 return fileItemHeaders; 361 } 362 363 /** 364 * Gets an {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file. 365 * 366 * @return An {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file. 367 * @throws IOException if an error occurs. 368 */ 369 @Override 370 public InputStream getInputStream() throws IOException { 371 if (!isInMemory()) { 372 return Files.newInputStream(dfos.getFile().toPath()); 373 } 374 375 if (cachedContent == null) { 376 cachedContent = dfos.getData(); 377 } 378 return new ByteArrayInputStream(cachedContent); 379 } 380 381 /** 382 * Gets the original file name in the client's file system. 383 * 384 * @return The original file name in the client's file system. 385 * @throws InvalidPathException The file name contains a NUL character, which might be an indicator of a security attack. If you intend to use the file name 386 * anyways, catch the exception and use {@link InvalidPathException#getInput()}. 387 */ 388 @Override 389 public String getName() { 390 return checkFileName(fileName); 391 } 392 393 /** 394 * Gets an {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file. 395 * 396 * @return An {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file. 397 */ 398 @Override 399 public OutputStream getOutputStream() { 400 if (dfos == null) { 401 dfos = DeferredFileOutputStream.builder().setThreshold(threshold).setOutputFile(getTempFile().toFile()).get(); 402 } 403 return dfos; 404 } 405 406 /** 407 * Gets the {@link Path} for the {@code FileItem}'s data's temporary location on the disk. Note that for {@code FileItem}s that have their data stored in 408 * memory, this method will return {@code null}. When handling large files, you can use {@link Files#move(Path,Path,CopyOption...)} to move the file to new 409 * location without copying the data, if the source and destination locations reside within the same logical volume. 410 * 411 * @return The data file, or {@code null} if the data is stored in memory. 412 */ 413 public Path getPath() { 414 if (dfos == null) { 415 return null; 416 } 417 if (isInMemory()) { 418 return null; 419 } 420 return dfos.getFile().toPath(); 421 } 422 423 /** 424 * Gets the size of the file. 425 * 426 * @return The size of the file, in bytes. 427 */ 428 @Override 429 public long getSize() { 430 if (size >= 0) { 431 return size; 432 } 433 if (cachedContent != null) { 434 return cachedContent.length; 435 } 436 return dfos != null ? dfos.getByteCount() : 0; 437 } 438 439 /** 440 * Gets the contents of the file as a String, using the default character encoding. This method uses {@link #get()} to retrieve the contents of the file. 441 * <p> 442 * <strong>TODO</strong> Consider making this method throw UnsupportedEncodingException. 443 * </p> 444 * 445 * @return The contents of the file, as a string. 446 * @throws IOException if an I/O error occurs 447 */ 448 @Override 449 public String getString() throws IOException { 450 return new String(get(), getCharset()); 451 } 452 453 /** 454 * Gets the contents of the file as a String, using the specified encoding. This method uses {@link #get()} to retrieve the contents of the file. 455 * 456 * @param charset The charset to use. 457 * @return The contents of the file, as a string. 458 * @throws IOException if an I/O error occurs 459 */ 460 @Override 461 public String getString(final Charset charset) throws IOException { 462 return new String(get(), Charsets.toCharset(charset, charsetDefault)); 463 } 464 465 /** 466 * Creates and returns a {@link java.io.File File} representing a uniquely named temporary file in the configured repository path. The lifetime of the file 467 * is tied to the lifetime of the {@code FileItem} instance; the file will be deleted when the instance is garbage collected. 468 * <p> 469 * <strong>Note: Subclasses that override this method must ensure that they return the same File each time.</strong> 470 * </p> 471 * 472 * @return The {@link java.io.File File} to be used for temporary storage. 473 */ 474 protected Path getTempFile() { 475 return tempFile; 476 } 477 478 /** 479 * Tests whether or not a {@code FileItem} instance represents a simple form field. 480 * 481 * @return {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file. 482 * @see #setFormField(boolean) 483 */ 484 @Override 485 public boolean isFormField() { 486 return isFormField; 487 } 488 489 /** 490 * Provides a hint as to whether or not the file contents will be read from memory. 491 * 492 * @return {@code true} if the file contents will be read from memory; {@code false} otherwise. 493 */ 494 @Override 495 public boolean isInMemory() { 496 if (cachedContent != null) { 497 return true; 498 } 499 return dfos.isInMemory(); 500 } 501 502 /** 503 * Sets the default charset for use when no explicit charset parameter is provided by the sender. 504 * 505 * @param charset the default charset 506 * @return {@code this} instance. 507 */ 508 public DiskFileItem setCharsetDefault(final Charset charset) { 509 charsetDefault = charset; 510 return this; 511 } 512 513 /** 514 * Sets the field name used to reference this file item. 515 * 516 * @param fieldName The name of the form field. 517 * @see #getFieldName() 518 */ 519 @Override 520 public DiskFileItem setFieldName(final String fieldName) { 521 this.fieldName = fieldName; 522 return this; 523 } 524 525 /** 526 * Specifies whether or not a {@code FileItem} instance represents a simple form field. 527 * 528 * @param state {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded file. 529 * @see #isFormField() 530 */ 531 @Override 532 public DiskFileItem setFormField(final boolean state) { 533 isFormField = state; 534 return this; 535 } 536 537 /** 538 * Sets the file item headers. 539 * 540 * @param headers The file items headers. 541 */ 542 @Override 543 public DiskFileItem setHeaders(final FileItemHeaders headers) { 544 this.fileItemHeaders = headers; 545 return this; 546 } 547 548 /** 549 * Returns a string representation of this object. 550 * 551 * @return a string representation of this object. 552 */ 553 @Override 554 public String toString() { 555 return String.format("name=%s, StoreLocation=%s, size=%s bytes, isFormField=%s, FieldName=%s", getName(), getPath(), getSize(), isFormField(), 556 getFieldName()); 557 } 558 559 /** 560 * Writes an uploaded item to disk. 561 * <p> 562 * The client code is not concerned with whether or not the item is stored in memory, or on disk in a temporary location. They just want to write the 563 * uploaded item to a file. 564 * </p> 565 * <p> 566 * This implementation first attempts to rename the uploaded item to the specified destination file, if the item was originally written to disk. Otherwise, 567 * the data will be copied to the specified file. 568 * </p> 569 * <p> 570 * This method is only guaranteed to work <em>once</em>, the first time it is invoked for a particular item. This is because, in the event that the method 571 * renames a temporary file, that file will no longer be available to copy or rename again at a later time. 572 * </p> 573 * 574 * @param file The {@code File} into which the uploaded item should be stored. 575 * @throws IOException if an error occurs. 576 */ 577 @Override 578 public DiskFileItem write(final Path file) throws IOException { 579 if (isInMemory()) { 580 try (var fout = Files.newOutputStream(file)) { 581 fout.write(get()); 582 } catch (final IOException e) { 583 throw new IOException("Unexpected output data", e); 584 } 585 } else { 586 final var outputFile = getPath(); 587 if (outputFile == null) { 588 /* 589 * For whatever reason we cannot write the file to disk. 590 */ 591 throw new FileUploadException("Cannot write uploaded file to disk."); 592 } 593 // Save the length of the file 594 size = Files.size(outputFile); 595 // 596 // The uploaded file is being stored on disk in a temporary location so move it to the desired file. 597 // 598 Files.move(outputFile, file, StandardCopyOption.REPLACE_EXISTING); 599 } 600 return this; 601 } 602}