Nifi PutFiles Processor代码注释版
1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.nifi.processors.standard; 18 19 import org.apache.nifi.annotation.behavior.EventDriven; 20 import org.apache.nifi.annotation.behavior.InputRequirement; 21 import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; 22 import org.apache.nifi.annotation.behavior.ReadsAttribute; 23 import org.apache.nifi.annotation.behavior.Restricted; 24 import org.apache.nifi.annotation.behavior.Restriction; 25 import org.apache.nifi.annotation.behavior.SupportsBatching; 26 import org.apache.nifi.annotation.documentation.CapabilityDescription; 27 import org.apache.nifi.annotation.documentation.SeeAlso; 28 import org.apache.nifi.annotation.documentation.Tags; 29 import org.apache.nifi.components.PropertyDescriptor; 30 import org.apache.nifi.components.RequiredPermission; 31 import org.apache.nifi.components.ValidationContext; 32 import org.apache.nifi.components.ValidationResult; 33 import org.apache.nifi.components.Validator; 34 import org.apache.nifi.expression.ExpressionLanguageScope; 35 import org.apache.nifi.flowfile.FlowFile; 36 import org.apache.nifi.flowfile.attributes.CoreAttributes; 37 import org.apache.nifi.logging.ComponentLog; 38 import org.apache.nifi.processor.AbstractProcessor; 39 import org.apache.nifi.processor.ProcessContext; 40 import org.apache.nifi.processor.ProcessSession; 41 import org.apache.nifi.processor.ProcessorInitializationContext; 42 import org.apache.nifi.processor.Relationship; 43 import org.apache.nifi.processor.exception.ProcessException; 44 import org.apache.nifi.processor.util.StandardValidators; 45 import org.apache.nifi.util.StopWatch; 46 47 import java.nio.file.Files; 48 import java.nio.file.Path; 49 import java.nio.file.Paths; 50 import java.nio.file.attribute.PosixFileAttributeView; 51 import java.nio.file.attribute.PosixFilePermissions; 52 import java.nio.file.attribute.UserPrincipalLookupService; 53 import java.text.DateFormat; 54 import java.text.SimpleDateFormat; 55 import java.util.Arrays; 56 import java.util.ArrayList; 57 import java.util.Collections; 58 import java.util.Date; 59 import java.util.HashSet; 60 import java.util.List; 61 import java.util.Locale; 62 import java.util.Set; 63 import java.util.concurrent.TimeUnit; 64 import java.util.regex.Matcher; 65 import java.util.regex.Pattern; 66 67 @EventDriven // Processor Behavior Annotations Processor can be scheduled using the Event-Driven scheduling strategy 68 @SupportsBatching // Processor Behavior Annotations - This annotation indicates that it is okay for the framework to batch together multiple ProcessSession commits into a single commit 69 @InputRequirement(Requirement.INPUT_REQUIRED) // Incoming connection is required 70 @Tags({"put", "local", "copy", "archive", "files", "filesystem"}) // Comments in processor 71 @CapabilityDescription("Writes the contents of a FlowFile to the local file system") // Comments in processor show in "Description" 72 @SeeAlso({FetchFile.class, GetFile.class}) // Comments in processor show in "Description" 73 @ReadsAttribute(attribute = "filename", description = "The filename to use when writing the FlowFile to disk.") // Comments in processor show in "Read Attributes" 74 @Restricted( 75 restrictions = { 76 @Restriction( 77 requiredPermission = RequiredPermission.WRITE_FILESYSTEM, 78 explanation = "Provides operator the ability to write to any file that NiFi has access to.") 79 } 80 ) // Comments in processor show in "Restricted" 81 public class PutFile extends AbstractProcessor { // processor should implement AbstractProcessor 82 83 public static final String REPLACE_RESOLUTION = "replace"; 84 public static final String IGNORE_RESOLUTION = "ignore"; 85 public static final String FAIL_RESOLUTION = "fail"; 86 87 public static final String FILE_MODIFY_DATE_ATTRIBUTE = "file.lastModifiedTime"; 88 public static final String FILE_MODIFY_DATE_ATTR_FORMAT = "yyyy-MM-dd'T'HH:mm:ssZ"; 89 90 public static final Pattern RWX_PATTERN = Pattern.compile("^([r-][w-])([x-])([r-][w-])([x-])([r-][w-])([x-])$"); 91 public static final Pattern NUM_PATTERN = Pattern.compile("^[0-7]{3}$"); 92 93 // A PropertyDescriptor MUST specify one or more Validators that can be used to ensure that the user-entered value for a property is valid 94 // This is a self design validator 95 private static final Validator PERMISSIONS_VALIDATOR = new Validator() { 96 @Override 97 public ValidationResult validate(String subject, String input, ValidationContext context) { // 98 ValidationResult.Builder vr = new ValidationResult.Builder(); 99 if (context.isExpressionLanguagePresent(input)) { 100 return new ValidationResult.Builder().subject(subject).input(input).explanation("Expression Language Present").valid(true).build(); 101 } 102 103 if (RWX_PATTERN.matcher(input).matches() || NUM_PATTERN.matcher(input).matches()) { 104 return vr.valid(true).build(); 105 } 106 return vr.valid(false) 107 .subject(subject) 108 .input(input) 109 .explanation("This must be expressed in rwxr-x--- form or octal triplet form.") 110 .build(); 111 } 112 }; 113 114 // All the properties will be shown in Configure -> Properties 115 // Define Property Description 116 public static final PropertyDescriptor DIRECTORY = new PropertyDescriptor.Builder() 117 .name("Directory") 118 .description("The directory to which files should be written. You may use expression language such as /aa/bb/${path}") 119 .required(true) 120 .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) 121 .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) 122 .build(); 123 public static final PropertyDescriptor MAX_DESTINATION_FILES = new PropertyDescriptor.Builder() 124 .name("Maximum File Count") 125 .description("Specifies the maximum number of files that can exist in the output directory") 126 .required(false) 127 .addValidator(StandardValidators.INTEGER_VALIDATOR) 128 .build(); 129 public static final PropertyDescriptor CONFLICT_RESOLUTION = new PropertyDescriptor.Builder() 130 .name("Conflict Resolution Strategy") 131 .description("Indicates what should happen when a file with the same name already exists in the output directory") 132 .required(true) 133 .defaultValue(FAIL_RESOLUTION) 134 .allowableValues(REPLACE_RESOLUTION, IGNORE_RESOLUTION, FAIL_RESOLUTION) 135 .build(); 136 public static final PropertyDescriptor CHANGE_LAST_MODIFIED_TIME = new PropertyDescriptor.Builder() 137 .name("Last Modified Time") 138 .description("Sets the lastModifiedTime on the output file to the value of this attribute. Format must be yyyy-MM-dd'T'HH:mm:ssZ. " 139 + "You may also use expression language such as ${file.lastModifiedTime}.") 140 .required(false) 141 .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) 142 .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) 143 .build(); 144 public static final PropertyDescriptor CHANGE_PERMISSIONS = new PropertyDescriptor.Builder() 145 .name("Permissions") 146 .description("Sets the permissions on the output file to the value of this attribute. Format must be either UNIX rwxrwxrwx with a - in " 147 + "place of denied permissions (e.g. rw-r--r--) or an octal number (e.g. 644). You may also use expression language such as " 148 + "${file.permissions}.") 149 .required(false) 150 .addValidator(PERMISSIONS_VALIDATOR) 151 .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) 152 .build(); 153 public static final PropertyDescriptor CHANGE_OWNER = new PropertyDescriptor.Builder() 154 .name("Owner") 155 .description("Sets the owner on the output file to the value of this attribute. You may also use expression language such as " 156 + "${file.owner}. Note on many operating systems Nifi must be running as a super-user to have the permissions to set the file owner.") 157 .required(false) 158 .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) 159 .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) 160 .build(); 161 public static final PropertyDescriptor CHANGE_GROUP = new PropertyDescriptor.Builder() 162 .name("Group") 163 .description("Sets the group on the output file to the value of this attribute. You may also use expression language such " 164 + "as ${file.group}.") 165 .required(false) 166 .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) 167 .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) 168 .build(); 169 public static final PropertyDescriptor CREATE_DIRS = new PropertyDescriptor.Builder() 170 .name("Create Missing Directories") 171 .description("If true, then missing destination directories will be created. If false, flowfiles are penalized and sent to failure.") 172 .required(true) 173 .allowableValues("true", "false") 174 .defaultValue("true") 175 .build(); 176 177 public static final int MAX_FILE_LOCK_ATTEMPTS = 10; 178 179 // Define relationship 180 public static final Relationship REL_SUCCESS = new Relationship.Builder() 181 .name("success") 182 .description("Files that have been successfully written to the output directory are transferred to this relationship") 183 .build(); 184 public static final Relationship REL_FAILURE = new Relationship.Builder() 185 .name("failure") 186 .description("Files that could not be written to the output directory for some reason are transferred to this relationship") 187 .build(); 188 189 private List<PropertyDescriptor> properties; 190 private Set<Relationship> relationships; 191 192 // Init the relationships and descriptors 193 @Override 194 protected void init(final ProcessorInitializationContext context) { 195 // relationships 196 final Set<Relationship> procRels = new HashSet<>(); 197 procRels.add(REL_SUCCESS); 198 procRels.add(REL_FAILURE); 199 relationships = Collections.unmodifiableSet(procRels); // relationships can't be changed after init 200 201 // descriptors 202 final List<PropertyDescriptor> supDescriptors = new ArrayList<>(); 203 supDescriptors.add(DIRECTORY); 204 supDescriptors.add(CONFLICT_RESOLUTION); 205 supDescriptors.add(CREATE_DIRS); 206 supDescriptors.add(MAX_DESTINATION_FILES); 207 supDescriptors.add(CHANGE_LAST_MODIFIED_TIME); 208 supDescriptors.add(CHANGE_PERMISSIONS); 209 supDescriptors.add(CHANGE_OWNER); 210 supDescriptors.add(CHANGE_GROUP); 211 properties = Collections.unmodifiableList(supDescriptors); // properties can't be changed after init 212 } 213 214 @Override 215 public Set<Relationship> getRelationships() { 216 return relationships; 217 } 218 219 @Override 220 protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { 221 return properties; 222 } 223 224 @Override 225 // ProcessContext provides a bridge between a processor and the framework. It can provide the information of a processor 226 // ProcessSession can create, destroy, examine, clone, transfer FlowFile, also can edit FlowFile including edit content and attributes 227 public void onTrigger(final ProcessContext context, final ProcessSession session) { 228 FlowFile flowFile = session.get(); 229 if (flowFile == null) { 230 return; 231 } 232 233 // used to "Reporting Processor Activity" 234 // StopWatch is used to record the time costed during the period 235 final StopWatch stopWatch = new StopWatch(true); 236 // Processors should log events via the ComponentLog, which is accessible via the InitializationContext or by calling the getLogger method of AbstractProcessor. 237 final ComponentLog logger = getLogger(); 238 239 // get the parameters from the configure property 240 final Path configuredRootDirPath = Paths.get(context.getProperty(DIRECTORY).evaluateAttributeExpressions(flowFile).getValue()); 241 final String conflictResponse = context.getProperty(CONFLICT_RESOLUTION).getValue(); 242 final Integer maxDestinationFiles = context.getProperty(MAX_DESTINATION_FILES).asInteger(); 243 244 Path tempDotCopyFile = null; 245 try { 246 final Path rootDirPath = configuredRootDirPath; 247 String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key()); 248 final Path tempCopyFile = rootDirPath.resolve("." + filename); 249 final Path copyFile = rootDirPath.resolve(filename); // return the absolute path 250 251 final String permissions = context.getProperty(CHANGE_PERMISSIONS).evaluateAttributeExpressions(flowFile).getValue(); // evaluateAttributeExpression 252 final String owner = context.getProperty(CHANGE_OWNER).evaluateAttributeExpressions(flowFile).getValue(); 253 final String group = context.getProperty(CHANGE_GROUP).evaluateAttributeExpressions(flowFile).getValue(); 254 if (!Files.exists(rootDirPath)) { 255 if (context.getProperty(CREATE_DIRS).asBoolean()) { 256 Path existing = rootDirPath; 257 while (!Files.exists(existing)) { 258 existing = existing.getParent(); 259 } 260 if (permissions != null && !permissions.trim().isEmpty()) { 261 try { 262 String perms = stringPermissions(permissions, true); 263 if (!perms.isEmpty()) { 264 Files.createDirectories(rootDirPath, PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString(perms))); 265 } else { 266 Files.createDirectories(rootDirPath); 267 } 268 } catch (Exception e) { 269 flowFile = session.penalize(flowFile); // Sets a penalty for the given FlowFile which will make it unavailable to be operated on any further during the penalty period. 270 session.transfer(flowFile, REL_FAILURE); // FlowFile is transferred to next stage with Fail relationship 271 logger.error("Could not set create directory with permissions {} because {}", new Object[]{permissions, e}); 272 return; 273 } 274 } else { 275 Files.createDirectories(rootDirPath); 276 } 277 278 boolean chOwner = owner != null && !owner.trim().isEmpty(); 279 boolean chGroup = group != null && !group.trim().isEmpty(); 280 if (chOwner || chGroup) { 281 Path currentPath = rootDirPath; 282 while (!currentPath.equals(existing)) { 283 if (chOwner) { 284 try { 285 UserPrincipalLookupService lookupService = currentPath.getFileSystem().getUserPrincipalLookupService(); 286 Files.setOwner(currentPath, lookupService.lookupPrincipalByName(owner)); 287 } catch (Exception e) { 288 logger.warn("Could not set directory owner to {} because {}", new Object[]{owner, e}); 289 } 290 } 291 if (chGroup) { 292 try { 293 UserPrincipalLookupService lookupService = currentPath.getFileSystem().getUserPrincipalLookupService(); 294 PosixFileAttributeView view = Files.getFileAttributeView(currentPath, PosixFileAttributeView.class); 295 view.setGroup(lookupService.lookupPrincipalByGroupName(group)); 296 } catch (Exception e) { 297 logger.warn("Could not set file group to {} because {}", new Object[]{group, e}); 298 } 299 } 300 currentPath = currentPath.getParent(); 301 } 302 } 303 } else { 304 flowFile = session.penalize(flowFile); 305 session.transfer(flowFile, REL_FAILURE); 306 logger.error("Penalizing {} and routing to 'failure' because the output directory {} does not exist and Processor is " 307 + "configured not to create missing directories", new Object[]{flowFile, rootDirPath}); 308 return; 309 } 310 } 311 312 final Path dotCopyFile = tempCopyFile; 313 tempDotCopyFile = dotCopyFile; 314 Path finalCopyFile = copyFile; 315 316 final Path finalCopyFileDir = finalCopyFile.getParent(); 317 if (Files.exists(finalCopyFileDir) && maxDestinationFiles != null) { // check if too many files already 318 final long numFiles = getFilesNumberInFolder(finalCopyFileDir, filename); 319 320 if (numFiles >= maxDestinationFiles) { 321 flowFile = session.penalize(flowFile); 322 logger.warn("Penalizing {} and routing to 'failure' because the output directory {} has {} files, which exceeds the " 323 + "configured maximum number of files", new Object[]{flowFile, finalCopyFileDir, numFiles}); 324 session.transfer(flowFile, REL_FAILURE); 325 return; 326 } 327 } 328 329 if (Files.exists(finalCopyFile)) { 330 switch (conflictResponse) { 331 case REPLACE_RESOLUTION: 332 Files.delete(finalCopyFile); 333 logger.info("Deleted {} as configured in order to replace with the contents of {}", new Object[]{finalCopyFile, flowFile}); 334 break; 335 case IGNORE_RESOLUTION: 336 session.transfer(flowFile, REL_SUCCESS); 337 logger.info("Transferring {} to success because file with same name already exists", new Object[]{flowFile}); 338 return; 339 case FAIL_RESOLUTION: 340 flowFile = session.penalize(flowFile); 341 logger.warn("Penalizing {} and routing to failure as configured because file with the same name already exists", new Object[]{flowFile}); 342 session.transfer(flowFile, REL_FAILURE); 343 return; 344 default: 345 break; 346 } 347 } 348 349 session.exportTo(flowFile, dotCopyFile, false); // Writes the content of the given FlowFile to the given destination path 350 351 final String lastModifiedTime = context.getProperty(CHANGE_LAST_MODIFIED_TIME).evaluateAttributeExpressions(flowFile).getValue(); 352 if (lastModifiedTime != null && !lastModifiedTime.trim().isEmpty()) { 353 try { 354 final DateFormat formatter = new SimpleDateFormat(FILE_MODIFY_DATE_ATTR_FORMAT, Locale.US); 355 final Date fileModifyTime = formatter.parse(lastModifiedTime); 356 dotCopyFile.toFile().setLastModified(fileModifyTime.getTime()); 357 } catch (Exception e) { 358 logger.warn("Could not set file lastModifiedTime to {} because {}", new Object[]{lastModifiedTime, e}); 359 } 360 } 361 362 if (permissions != null && !permissions.trim().isEmpty()) { 363 try { 364 String perms = stringPermissions(permissions, false); 365 if (!perms.isEmpty()) { 366 Files.setPosixFilePermissions(dotCopyFile, PosixFilePermissions.fromString(perms)); 367 } 368 } catch (Exception e) { 369 logger.warn("Could not set file permissions to {} because {}", new Object[]{permissions, e}); 370 } 371 } 372 373 if (owner != null && !owner.trim().isEmpty()) { 374 try { 375 UserPrincipalLookupService lookupService = dotCopyFile.getFileSystem().getUserPrincipalLookupService(); 376 Files.setOwner(dotCopyFile, lookupService.lookupPrincipalByName(owner)); 377 } catch (Exception e) { 378 logger.warn("Could not set file owner to {} because {}", new Object[]{owner, e}); 379 } 380 } 381 382 if (group != null && !group.trim().isEmpty()) { 383 try { 384 UserPrincipalLookupService lookupService = dotCopyFile.getFileSystem().getUserPrincipalLookupService(); 385 PosixFileAttributeView view = Files.getFileAttributeView(dotCopyFile, PosixFileAttributeView.class); 386 view.setGroup(lookupService.lookupPrincipalByGroupName(group)); 387 } catch (Exception e) { 388 logger.warn("Could not set file group to {} because {}", new Object[]{group, e}); 389 } 390 } 391 392 boolean renamed = false; 393 for (int i = 0; i < 10; i++) { // try rename up to 10 times. 394 if (dotCopyFile.toFile().renameTo(finalCopyFile.toFile())) { // rename file dotCopyFile is temple filename, 395 renamed = true; 396 break;// rename was successful 397 } 398 Thread.sleep(100L);// try waiting a few ms to let whatever might cause rename failure to resolve 399 } 400 401 if (!renamed) { 402 if (Files.exists(dotCopyFile) && dotCopyFile.toFile().delete()) { 403 logger.debug("Deleted dot copy file {}", new Object[]{dotCopyFile}); 404 } 405 throw new ProcessException("Could not rename: " + dotCopyFile); 406 } else { 407 logger.info("Produced copy of {} at location {}", new Object[]{flowFile, finalCopyFile}); 408 } 409 410 // for "reporting processor activity purpose" 411 session.getProvenanceReporter().send(flowFile, finalCopyFile.toFile().toURI().toString(), stopWatch.getElapsed(TimeUnit.MILLISECONDS)); 412 session.transfer(flowFile, REL_SUCCESS); 413 } catch (final Throwable t) { 414 if (tempDotCopyFile != null) { 415 try { 416 Files.deleteIfExists(tempDotCopyFile); 417 } catch (final Exception e) { 418 logger.error("Unable to remove temporary file {} due to {}", new Object[]{tempDotCopyFile, e}); 419 } 420 } 421 422 flowFile = session.penalize(flowFile); 423 logger.error("Penalizing {} and transferring to failure due to {}", new Object[]{flowFile, t}); 424 session.transfer(flowFile, REL_FAILURE); 425 } 426 } 427 428 private long getFilesNumberInFolder(Path folder, String filename) { 429 String[] filesInFolder = folder.toFile().list(); 430 return Arrays.stream(filesInFolder) 431 .filter(eachFilename -> !eachFilename.equals(filename)) 432 .count(); 433 } 434 435 // It's used to grant permission for the files 436 protected String stringPermissions(String perms, boolean directory) { 437 String permissions = ""; 438 Matcher rwx = RWX_PATTERN.matcher(perms); 439 if (rwx.matches()) { 440 if (directory) { 441 // To read or write, directory access will be required 442 StringBuilder permBuilder = new StringBuilder(); 443 permBuilder.append("$1"); 444 permBuilder.append(rwx.group(1).equals("--") ? "$2" : "x"); 445 permBuilder.append("$3"); 446 permBuilder.append(rwx.group(3).equals("--") ? "$4" : "x"); 447 permBuilder.append("$5"); 448 permBuilder.append(rwx.group(5).equals("--") ? "$6" : "x"); 449 permissions = rwx.replaceAll(permBuilder.toString()); 450 } else { 451 permissions = perms; 452 } 453 } else if (NUM_PATTERN.matcher(perms).matches()) { 454 try { 455 int number = Integer.parseInt(perms, 8); 456 StringBuilder permBuilder = new StringBuilder(); 457 if ((number & 0x100) > 0) { 458 permBuilder.append('r'); 459 } else { 460 permBuilder.append('-'); 461 } 462 if ((number & 0x80) > 0) { 463 permBuilder.append('w'); 464 } else { 465 permBuilder.append('-'); 466 } 467 if (directory || (number & 0x40) > 0) { 468 permBuilder.append('x'); 469 } else { 470 permBuilder.append('-'); 471 } 472 if ((number & 0x20) > 0) { 473 permBuilder.append('r'); 474 } else { 475 permBuilder.append('-'); 476 } 477 if ((number & 0x10) > 0) { 478 permBuilder.append('w'); 479 } else { 480 permBuilder.append('-'); 481 } 482 if ((number & 0x8) > 0) { 483 permBuilder.append('x'); 484 } else { 485 if (directory && (number & 0x30) > 0) { 486 // To read or write, directory access will be required 487 permBuilder.append('x'); 488 } else { 489 permBuilder.append('-'); 490 } 491 } 492 if ((number & 0x4) > 0) { 493 permBuilder.append('r'); 494 } else { 495 permBuilder.append('-'); 496 } 497 if ((number & 0x2) > 0) { 498 permBuilder.append('w'); 499 } else { 500 permBuilder.append('-'); 501 } 502 if ((number & 0x1) > 0) { 503 permBuilder.append('x'); 504 } else { 505 if (directory && (number & 0x6) > 0) { 506 // To read or write, directory access will be required 507 permBuilder.append('x'); 508 } else { 509 permBuilder.append('-'); 510 } 511 } 512 permissions = permBuilder.toString(); 513 } catch (NumberFormatException ignore) { 514 } 515 } 516 517 return permissions; 518 } 519 }