Ranger记录HMS事件过慢问题

1、定位到过慢的代码:
从hms拉取的大量事件,有很多不需要ranger-admin处理,如ADD_PARTITION等,但是这些事件ranger也会存储其id,放到m_hms_event_id表
2、改造后的代码:
(更改前提:processNotifications方法的第二个参数List<MNotificationEvent> events有序,并且集合是根据eventId做的排列,已确定HMS接口返回的是升序。)
只有被处理的事件和事件集合中最后一个事件,ranger-admin里的m_hms_event_id,需要保留事件id,其余事件的事件id都不用存储,增加过滤的逻辑如下(if (isNotificationProcessed || i == events.size() - 1)):
 
      
/*
 Licensed to the Apache Software Foundation (ASF) under one
 or more contributor license agreements.  See the NOTICE file
 distributed with this work for additional information
 regarding copyright ownership.  The ASF licenses this file
 to you under the Apache License, Version 2.0 (the
 "License"); you may not use this file except in compliance
 with the License.  You may obtain a copy of the License at
 
 http://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
*/
 
package org.apache.ranger.sync.hms.service.thrift;
 
import static com.msxf.ranger.sync.hms.common.HMSSyncConstants.*;
 
import com.google.common.collect.ImmutableMap;
import com.msxf.bdp.vp.VersionProxyService;
import com.msxf.bdp.vp.VersionProxyServiceFactory;
import com.msxf.ranger.sync.hms.common.model.PathsImage;
import com.msxf.ranger.sync.hms.common.model.ProfileInfo;
import com.msxf.ranger.sync.hms.common.model.message.IMessageDeserializer;
import com.msxf.ranger.sync.hms.common.util.HmsSyncPropertiesUtil;
import com.msxf.ranger.sync.hms.thrift.ISyncHMSClient;
import com.msxf.ranger.sync.hms.thrift.MNotificationEvent;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.ranger.biz.RangerBizUtil;
import org.apache.ranger.biz.ServiceDBStore;
import org.apache.ranger.common.PropertiesUtil;
import org.apache.ranger.common.db.RangerTransactionSynchronizationAdapter;
import org.apache.ranger.service.RangerPolicyService;
import org.apache.ranger.sync.common.SyncConfiguration;
import org.apache.ranger.sync.common.convert.HdfsPolicySyncHelper;
import org.apache.ranger.sync.common.convert.SyncConvertHelper;
import org.apache.ranger.sync.common.entity.HmsSyncParam;
import org.apache.ranger.sync.common.exception.HmsServiceException;
import org.apache.ranger.sync.common.persistent.RangerSyncStore;
import org.apache.ranger.sync.common.state.SyncStateBank;
import org.apache.ranger.sync.hms.common.exception.HmsSyncException;
import org.apache.ranger.sync.hms.common.model.HMSSyncState;
import org.apache.ranger.sync.hms.common.util.HmsRecordLogger;
import org.apache.ranger.sync.hms.processor.NotificationProcessor;
import org.eclipse.persistence.jpa.rs.exceptions.JPARSException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
 
/**
 * HMSFollower follows the Hive MetaStore state changes from Ranger. It gets the full update and
 * notification logs from HMS and applies it to update permissions stored in Ranger using
 * rangerSyncStore and also update the &lt obj,path &gt state stored for HDFS-Ranger sync.
 */
@Component
public class HMSFollower extends SyncConfiguration {
 
  private static final Logger LOGGER = LoggerFactory.getLogger(HMSFollower.class);
  private final Map<String, ProfileInfo> profileInfoMap = new HashMap<>();
  private final Map<String, ISyncHMSClient> syncHMSClientMap = new HashMap<>();
 
  public Map<String, ISyncHMSClient> getSyncHMSClientMap() {
    return ImmutableMap.copyOf(syncHMSClientMap);
  }
 
  private final NotificationFetcher notificationFetcher;
  private final NotificationProcessor notificationProcessor;
  private final Map<String, AtomicBoolean> fullUpdateHMSMap = new HashMap<>();
  private final Map<String, AtomicLong> lastProcessedEventIdMap = new ConcurrentHashMap<>();
 
  @Autowired
  public HMSFollower(
      RangerTransactionSynchronizationAdapter txSyncAdapter,
      RangerSyncStore rangerSyncStore,
      ServiceDBStore rangerSvcStore,
      RangerPolicyService rangerPolicyService,
      RangerBizUtil rangerBizUtil,
      SyncConvertHelper syncConvertHelper,
      HdfsPolicySyncHelper hdfsPolicySyncHelper) {
    LOGGER.info("HMSFollower is being initializing");
 
    // 公共配置
    if (PropertiesUtil.getPropertiesMap().containsKey(RANGER_SYNC_METASTORE_URIS)) {
      LOGGER.warn(
          "{} is deprecated. Use {}", RANGER_SYNC_METASTORE_URIS, RANGER_SYNC_METASTORE_PROFILES);
    }
 
    String[] profiles = PropertiesUtil.getPropertyStringList(RANGER_SYNC_METASTORE_PROFILES);
    if (profiles.length == 0) {
      throw new IllegalArgumentException("There is no configured profile");
    }
 
    HmsSyncPropertiesUtil hmsSyncPropertiesUtil =
        new HmsSyncPropertiesUtil(PropertiesUtil.getProps());
 
    VersionProxyService hmsClientProxyService;
    VersionProxyService hmsDeserializerProxyService;
    try {
      String configPath = PropertiesUtil.getProperty(RANGER_SYNC_VERSION_PROXY_CONFIG_PATH);
      if (StringUtils.isEmpty(configPath)) {
        throw new IllegalArgumentException("Version proxy is no configured");
      }
      
      VersionProxyServiceFactory proxyServiceFactory =
          VersionProxyServiceFactory.newBuilder()
              .configLocation(configPath)
              .build();
      hmsClientProxyService = proxyServiceFactory.get(ISyncHMSClient.class);
      hmsDeserializerProxyService = proxyServiceFactory.get(IMessageDeserializer.class);
    } catch (Exception e) {
      throw new RuntimeException("Version proxy service init failed", e);
    }
    for (String profile : profiles) {
      SyncStateBank.disableState(profile, HMSSyncState.UNINITIALIZED);
      SyncStateBank.enableState(profile, HMSSyncState.STARTING);
      // 加载 profile 信息
      String hmsVersion =
          Optional.ofNullable(PropertiesUtil.getProperty(buildProfileHmsVersionKey(profile)))
              .orElseThrow(
                  () ->
                      new IllegalArgumentException(
                          "Profile[" + profile + "] has no mapping hms version"));
      ProfileInfo profileInfo = new ProfileInfo(profile, hmsVersion, hmsDeserializerProxyService.createProxy(profile));
      String[] catalogs =
          PropertiesUtil.getPropertyStringList(
              buildProfileCatalogsKey(profile), RANGER_SYNC_METASTORE_PROFILE_CATALOG_DEFAULT);
      for (String catalog : catalogs) {
        ProfileInfo.CatalogService catalogService = new ProfileInfo.CatalogService();
        catalogService.setHiveServiceName(
            Optional.ofNullable(
                    PropertiesUtil.getProperty(buildHiveServiceNameKey(profile, catalog)))
                .orElseThrow(
                    () ->
                        new IllegalArgumentException(
                            "Catalog[" + catalog + "] has no mapping hive service")));
        catalogService.setHdfsServiceName(
            Optional.ofNullable(
                    PropertiesUtil.getProperty(buildHdfsServiceNameKey(profile, catalog)))
                .orElseThrow(
                    () ->
                        new IllegalArgumentException(
                            "Catalog[" + catalog + "] has no mapping hdfs service")));
        profileInfo.addCatalogServiceMapping(catalog, catalogService);
      }
      profileInfoMap.put(profile, profileInfo);
 
      try {
 
        lastProcessedEventIdMap.put(
            profile, new AtomicLong(rangerSyncStore.getLastPersistedNotificationID(profile)));
        fullUpdateHMSMap.put(profile, new AtomicBoolean(false));
 
        ISyncHMSClient hmsProxyServiceProxy = hmsClientProxyService.createProxy(profile);
        hmsProxyServiceProxy.init(profile, hmsSyncPropertiesUtil, subject);
        syncHMSClientMap.put(profile, hmsProxyServiceProxy);
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
      SyncStateBank.enableState(profile, HMSSyncState.STARTED);
    }
 
    hdfsPolicySyncHelper.profileInfoMap = profileInfoMap;
    notificationFetcher = new NotificationFetcher(syncHMSClientMap);
    notificationProcessor =
        new NotificationProcessor(
            txSyncAdapter,
            rangerSvcStore,
            rangerPolicyService,
            rangerBizUtil,
            syncConvertHelper,
            hdfsPolicySyncHelper,
            syncHMSClientMap);
 
    LOGGER.info("HMSFollower initialize successful");
  }
 
  /**
   * Processes new Hive Metastore notifications.
   *
   * <p>If no notifications are processed yet, then it does a full initial snapshot of the Hive
   * Metastore followed by new notifications updates that could have happened after it.
   */
  public void syncUpWithHms(String profile) {
    if (!initTx()) {
      return;
    }
    // session 是和线程绑定的
    if (!initSession()) {
      LOGGER.info("Session initialize failed, skipping sync");
      return;
    }
 
    long notificationId = lastProcessedEventIdMap.get(profile).get();
    if (SyncStateBank.isEnabled(profile, HMSSyncState.PAUSED)) {
      LOGGER.info(
          "[{}]HMS sync is suspend, event with notification id {} is delay",
          profile,
          notificationId);
      return;
    }
 
    try {
      syncHMSClientMap.get(profile).connect();
      SyncStateBank.enableState(profile, HMSSyncState.CONNECTED);
    } catch (Throwable e) {
      LOGGER.error("[" + profile + "]HMSFollower cannot connect to HMS!!", e);
      return;
    }
 
    try {
      /* Before getting notifications, it checks if a full HMS snapshot is required. */
      if (isFullSnapshotRequired(profile, notificationId)) {
        if (PropertiesUtil.getBooleanProperty(
            buildProfileInitialDeltaKey(profile), RANGER_SYNC_METASTORE_INITIAL_IS_DELTA_DEFAULT)) {
          notificationId =
              syncHMSClientMap.get(profile).getCurrentNotificationEventId()
                  - 1; // 拉最新的一条,为了记录下event id
          if (SyncStateBank.isEnabled(profile, HMSSyncState.FULL_UPDATE_RUNNING)) {
            SyncStateBank.disableState(profile, HMSSyncState.FULL_UPDATE_RUNNING);
          }
        } else {
          createFullSnapshot(profile);
          return;
        }
      }
      // 设置最大拉取500条
      int pollMax = PropertiesUtil.getIntProperty(RANGER_SYNC_PULL_BATCH_MAX, 500);
      List<MNotificationEvent> notifications =
          notificationFetcher.fetchNotifications(profile, notificationId, pollMax);
 
      // After getting notifications, it checks if the HMS did some clean-up
      // and notifications are out-of-sync with Ranger.
      if (areNotificationsOutOfSync(profile, notifications, notificationId)) {
        createFullSnapshot(profile);
        return;
      }
 
      // Continue with processing new notifications if no snapshots are done.
      processNotifications(profile, notifications);
    } catch (Throwable t) {
      // catching errors to prevent the executor to halt.
      LOGGER.error("Exception in HMSFollower! Caused by: " + t.getMessage(), t);
      close();
    }
  }
 
  /**
   * Checks if a new full HMS snapshot request is needed by checking if:
   *
   * <ul>
   *   <li>Ranger HMS Notification table is EMPTY
   *   <li>HDFSSync is enabled
   *   <li>The current notification Id on the HMS is less than the latest processed by Ranger.
   *   <li>Full Snapshot Signal is detected
   * </ul>
   *
   * @param latestNotificationId The notification id to check against the HMS
   * @return True if a full snapshot is required; False otherwise.
   * @throws Exception If an error occurs while checking the rangerSyncStore or the HMS client.
   */
  private boolean isFullSnapshotRequired(String profile, long latestNotificationId)
      throws Exception {
    // 是空的说明肯定初始化,只能全量
    if (rangerSyncStore.isHmsNotificationEmpty(profile)) {
      LOGGER.debug(
          "Ranger Store has no HMS Notifications. Create Full HMS Snapshot."
              + "Latest sentry notification Id = {}",
          latestNotificationId);
      return true;
    }
 
    long currentHmsNotificationId = notificationFetcher.getCurrentNotificationId(profile);
    if (currentHmsNotificationId < latestNotificationId) {
      LOGGER.info(
          "The current notification ID on HMS = {} is less than the latest processed notification ID = {}. "
              + "Need to request a full HMS snapshot",
          currentHmsNotificationId,
          latestNotificationId);
      return true;
    }
 
    // Check if forced full update is required, reset update flag to false.
    // Only do it once per forced full update request.
    if (fullUpdateHMSMap.get(profile).compareAndSet(true, false)) {
      LOGGER.info("FULL UPDATE TRIGGER: initiating full HMS snapshot request");
      return true;
    }
 
    return false;
  }
 
  /**
   * Checks if the HMS and Ranger processed notifications are out-of-sync. This could happen because
   * the HMS did some clean-up of old notifications and Ranger was not requesting notifications
   * during that time.
   *
   * @param profile
   * @param events All new notifications to check for an out-of-sync.
   * @param latestProcessedId The latest notification processed by Ranger to check against the list
   *     of notifications events.
   * @return True if an out-of-sync is found; False otherwise.
   */
  private boolean areNotificationsOutOfSync(
      String profile, Collection<MNotificationEvent> events, long latestProcessedId) {
    if (events.isEmpty()) {
      return false;
    }
 
    /*
     * If the sequence of notifications has a gap, then an out-of-sync might
     * have happened due to the following issue:
     *
     * - HDFS sync was disabled or Ranger was shutdown for a time period longer than
     * the HMS notification clean-up thread causing old notifications to be deleted.
     *
     * HMS notifications may contain both gaps in the sequence and duplicates
     * (the same ID repeated more than once for different events).
     *
     * To accept duplicates (see NotificationFetcher for more info), then a gap is found
     * if the 1st notification received is higher than the current ID processed + 1.
     * i.e.
     *   1st ID = 3, latest ID = 3 (duplicate found but no gap detected)
     *   1st ID = 4, latest ID = 3 (consecutive ID found but no gap detected)
     *   1st ID = 5, latest ID = 3 (a gap is detected)
     */
 
    List<MNotificationEvent> eventList = (List<MNotificationEvent>) events;
    long firstNotificationId = eventList.get(0).getEventId();
 
    if (firstNotificationId > (latestProcessedId + 1)) {
      LOGGER.info(
          "[{}]First HMS event id = {} is greater than latest Ranger processed id = {} + 1."
              + "Need to request a full HMS snapshot.",
          profile,
          firstNotificationId,
          latestProcessedId);
      return true;
    }
 
    return false;
  }
 
  /**
   * Request for full snapshot and persists it if there is no snapshot available in the sentry
   * store. Also, wakes-up any waiting clients.
   */
  private void createFullSnapshot(String profile) {
    LOGGER.info("[{}]Attempting to take full HMS snapshot", profile);
    try {
      // Set that the full update is running
      if (!SyncStateBank.isEnabled(profile, HMSSyncState.FULL_UPDATE_RUNNING)) {
        SyncStateBank.enableState(profile, HMSSyncState.FULL_UPDATE_RUNNING);
      }
 
      PathsImage snapshotInfo = syncHMSClientMap.get(profile).getFullSnapshot();
      if (snapshotInfo.getPathImage().isEmpty()) {
        LOGGER.debug("Received empty path image from HMS while taking a full snapshot");
        return;
      }
 
      try {
        LOGGER.info(
            "[{}]Persisting full snapshot for notification Id = {}", profile, snapshotInfo.getId());
        notificationProcessor.onFullPathsImage(snapshotInfo, profileInfoMap.get(profile));
      } catch (Exception failure) {
        LOGGER.error("[{}]Received exception while persisting HMS path full snapshot", profile);
        throw failure;
      }
      // HMSFollower connected to HMS, and it finished full snapshot if that was required
      // Log this message only once
      if (rangerSyncStore.persistLastProcessedNotificationID(profile, snapshotInfo.getId(), 1)) {
        lastProcessedEventIdMap.get(profile).set(snapshotInfo.getId());
        LOGGER.info("[{}]Ranger HMS support is ready", profile);
      } else {
        SyncStateBank.enableState(profile, HMSSyncState.PAUSED);
        LOGGER.error(
            "[{}]Event id {} persist failed, HmsFollower set to pause",
            profile,
            snapshotInfo.getId());
      }
    } catch (Exception failure) {
      LOGGER.error("[{}]Received exception while creating HMS path full snapshot", profile);
      throw failure;
    } finally {
      if (SyncStateBank.isEnabled(profile, HMSSyncState.FULL_UPDATE_RUNNING)) {
        SyncStateBank.disableState(profile, HMSSyncState.FULL_UPDATE_RUNNING);
      }
    }
  }
 
  /**
   * Process the collection of notifications and wake up any waiting clients. Also, persists the
   * notification ID regardless of processing result.
   *
   * @param events list of event to be processed
   */
  public void processNotifications(String profile, List<MNotificationEvent> events) {
    if (!SyncStateBank.isEnabled(profile, HMSSyncState.INCREMENTAL_UPDATE_RUNNING)) {
      SyncStateBank.enableState(profile, HMSSyncState.INCREMENTAL_UPDATE_RUNNING);
    }
    if (events.isEmpty()) {
      return;
    }
    HmsRecordLogger hmsRecordLogger = new HmsRecordLogger(profile, events);
    hmsRecordLogger.start();
    txExecute(
        () -> {
          boolean isNotificationProcessed;
          for (int i = 0; i < events.size(); i++) {
            MNotificationEvent event = events.get(i);
            if (SyncStateBank.isEnabled(profile, HMSSyncState.PAUSED)) {
              LOGGER.info("[{}]Hms sync is paused, skip the remaining events", profile);
              break;
            }
            try {
              isNotificationProcessed =
                  notificationProcessor.processNotificationEvent(
                      profileInfoMap.get(profile), event);
            } catch (HmsSyncException e) {
              SyncStateBank.enableState(profile, HMSSyncState.PAUSED);
              SyncStateBank.disableState(profile, HMSSyncState.INCREMENTAL_UPDATE_RUNNING);
              LOGGER.error(
                  "[{}]Event id: {} processing exception, pause sync... ",
                  profile,
                  event.getEventId(),
                  e);
              break;
            } catch (HmsServiceException e) {
              // TODO 服务类异常
              SyncStateBank.enableState(profile, HMSSyncState.PAUSED);
              SyncStateBank.disableState(profile, HMSSyncState.INCREMENTAL_UPDATE_RUNNING);
              LOGGER.error(
                  "[{}]Processing the notification with ID:{} failed!",
                  profile,
                  event.getEventId(),
                  e);
              break;
            } catch (Exception e) {
              if (e.getCause() instanceof JPARSException) {
                LOGGER.info(
                    "[{}]Received JDO Storage Exception, Could be because of processing duplicate notification",
                    profile);
                if (event.getEventId() <= rangerSyncStore.getLastPersistedNotificationID(profile)) {
                  // Rest of the notifications need not be processed.
                  LOGGER.error(
                      "[{}]Received event with Id: {} which is smaller then the ID persisted in store",
                      profile,
                      event.getEventId());
                  break;
                }
              }
              SyncStateBank.enableState(profile, HMSSyncState.PAUSED);
              SyncStateBank.disableState(profile, HMSSyncState.INCREMENTAL_UPDATE_RUNNING);
              LOGGER.error(
                  "[{}]Processing the notification with ID:{} failed!",
                  profile,
                  event.getEventId(),
                  e);
              break;
            }
            if (!isNotificationProcessed) {
              LOGGER.warn(
                  "[{}]The listener encountered an issue while processing the data of HMSFollower (event ID {}), "
                      + "and the remaining events will not be executed. "
                      + "They will be processed in the next normal batch.",
                  profile,
                  event.getEventId());
              break;
            }
            try {
              if (isNotificationProcessed || i == events.size() - 1) {
                if (rangerSyncStore.persistLastProcessedNotificationID(
                        profile, event.getEventId(), 0)) {
                  long expect = event.getEventId() - 1;
                  if (!lastProcessedEventIdMap
                          .get(profile)
                          .compareAndSet(expect, event.getEventId())) {
                    if (!SyncStateBank.isEnabled(profile, HMSSyncState.SKIP_CHANGE)) {
                      long lastPersistedNotificationID =
                              rangerSyncStore.getLastPersistedNotificationID(profile);
                      LOGGER.warn(
                              "[{}]The event id maintained by HmsFollower does not self increment "
                                      + "and the state is not in skip. "
                                      + "Expected value is [{}], current value is [{}], reset to [{}]",
                              profile,
                              expect,
                              lastProcessedEventIdMap.get(profile).get(),
                              lastPersistedNotificationID);
                      lastProcessedEventIdMap.get(profile).set(lastPersistedNotificationID);
                    }
                  }
                }
              }
            } catch (Exception failure) {
              LOGGER.error(
                  "[{}]Received exception while persisting the notification ID = {}, the hms sync will be paused, Caused by {}",
                  profile,
                  event.getEventId(),
                  ExceptionUtils.getFullStackTrace(failure));
              SyncStateBank.enableState(profile, HMSSyncState.PAUSED);
              SyncStateBank.disableState(profile, HMSSyncState.INCREMENTAL_UPDATE_RUNNING);
            }
          }
        });
    hmsRecordLogger.end();
  }
 
  public void close() {
    syncHMSClientMap.forEach(
        (key, value) -> {
          try {
            value.disconnect();
            SyncStateBank.disableState(key, HMSSyncState.CONNECTED);
          } catch (Exception failure) {
            LOGGER.error("Failed to close the Ranger HMS Client", failure);
          }
        });
    notificationFetcher.close();
    notificationProcessor.close();
  }
 
  public void resetEventId(HmsSyncParam hmsSyncParam) {
    LOGGER.info("reset hms event id to [{}]", hmsSyncParam);
    SyncStateBank.enableState(hmsSyncParam.getProfile(), HMSSyncState.SKIP_CHANGE);
    lastProcessedEventIdMap.get(hmsSyncParam.getProfile()).set(hmsSyncParam.getEventId());
    if (hmsSyncParam.getEventId()
        > rangerSyncStore.getLastPersistedNotificationID(hmsSyncParam.getProfile())) {
      rangerSyncStore.persistLastProcessedNotificationID(
          hmsSyncParam.getProfile(), hmsSyncParam.getEventId(), 0);
    } else {
      LOGGER.info(
          "[{}]The reset event id is less than the persisted, only modify the memory value",
          hmsSyncParam);
    }
  }
 
  public void initEventIdAndInitStatus(String profile) {
    long lastPersistedNotificationID = rangerSyncStore.getLastPersistedNotificationID(profile);
    if (lastPersistedNotificationID > lastProcessedEventIdMap.get(profile).get()
        && !SyncStateBank.isEnabled(profile, HMSSyncState.SKIP_CHANGE)) {
      lastProcessedEventIdMap.get(profile).set(lastPersistedNotificationID);
    }
  }
 
  public long getLastProcessedEventIdCache(String profile) {
    return lastProcessedEventIdMap.get(profile).get();
  }
 
  public List<MNotificationEvent> fetchNotifications(String profile, long offset, int size)
      throws Exception {
    if (size > 0) {
      return notificationFetcher.fetchNotifications(profile, offset, size);
    } else {
      return notificationFetcher.fetchNotifications(profile, offset);
    }
  }
}
 
    
 
      /* Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.  You may obtain a copy of the License at
 http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.*/
package org.apache.ranger.sync.hms.service.thrift;
import static com.msxf.ranger.sync.hms.common.HMSSyncConstants.*;
import com.google.common.collect.ImmutableMap;import com.msxf.bdp.vp.VersionProxyService;import com.msxf.bdp.vp.VersionProxyServiceFactory;import com.msxf.ranger.sync.hms.common.model.PathsImage;import com.msxf.ranger.sync.hms.common.model.ProfileInfo;import com.msxf.ranger.sync.hms.common.model.message.IMessageDeserializer;import com.msxf.ranger.sync.hms.common.util.HmsSyncPropertiesUtil;import com.msxf.ranger.sync.hms.thrift.ISyncHMSClient;import com.msxf.ranger.sync.hms.thrift.MNotificationEvent;import java.util.Collection;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.Optional;import java.util.concurrent.ConcurrentHashMap;import java.util.concurrent.atomic.AtomicBoolean;import java.util.concurrent.atomic.AtomicLong;import org.apache.commons.lang.exception.ExceptionUtils;import org.apache.commons.lang3.StringUtils;import org.apache.ranger.biz.RangerBizUtil;import org.apache.ranger.biz.ServiceDBStore;import org.apache.ranger.common.PropertiesUtil;import org.apache.ranger.common.db.RangerTransactionSynchronizationAdapter;import org.apache.ranger.service.RangerPolicyService;import org.apache.ranger.sync.common.SyncConfiguration;import org.apache.ranger.sync.common.convert.HdfsPolicySyncHelper;import org.apache.ranger.sync.common.convert.SyncConvertHelper;import org.apache.ranger.sync.common.entity.HmsSyncParam;import org.apache.ranger.sync.common.exception.HmsServiceException;import org.apache.ranger.sync.common.persistent.RangerSyncStore;import org.apache.ranger.sync.common.state.SyncStateBank;import org.apache.ranger.sync.hms.common.exception.HmsSyncException;import org.apache.ranger.sync.hms.common.model.HMSSyncState;import org.apache.ranger.sync.hms.common.util.HmsRecordLogger;import org.apache.ranger.sync.hms.processor.NotificationProcessor;import org.eclipse.persistence.jpa.rs.exceptions.JPARSException;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.stereotype.Component;
/** * HMSFollower follows the Hive MetaStore state changes from Ranger. It gets the full update and * notification logs from HMS and applies it to update permissions stored in Ranger using * rangerSyncStore and also update the &lt obj,path &gt state stored for HDFS-Ranger sync. */@Componentpublic class HMSFollower extends SyncConfiguration {
  private static final Logger LOGGER = LoggerFactory.getLogger(HMSFollower.class);  private final Map<String, ProfileInfo> profileInfoMap = new HashMap<>();  private final Map<String, ISyncHMSClient> syncHMSClientMap = new HashMap<>();
  public Map<String, ISyncHMSClient> getSyncHMSClientMap() {    return ImmutableMap.copyOf(syncHMSClientMap);  }
  private final NotificationFetcher notificationFetcher;  private final NotificationProcessor notificationProcessor;  private final Map<String, AtomicBoolean> fullUpdateHMSMap = new HashMap<>();  private final Map<String, AtomicLong> lastProcessedEventIdMap = new ConcurrentHashMap<>();
  @Autowired  public HMSFollower(      RangerTransactionSynchronizationAdapter txSyncAdapter,      RangerSyncStore rangerSyncStore,      ServiceDBStore rangerSvcStore,      RangerPolicyService rangerPolicyService,      RangerBizUtil rangerBizUtil,      SyncConvertHelper syncConvertHelper,      HdfsPolicySyncHelper hdfsPolicySyncHelper) {    LOGGER.info("HMSFollower is being initializing");
    // 公共配置    if (PropertiesUtil.getPropertiesMap().containsKey(RANGER_SYNC_METASTORE_URIS)) {      LOGGER.warn(          "{} is deprecated. Use {}", RANGER_SYNC_METASTORE_URIS, RANGER_SYNC_METASTORE_PROFILES);    }
    String[] profiles = PropertiesUtil.getPropertyStringList(RANGER_SYNC_METASTORE_PROFILES);    if (profiles.length == 0) {      throw new IllegalArgumentException("There is no configured profile");    }
    HmsSyncPropertiesUtil hmsSyncPropertiesUtil =        new HmsSyncPropertiesUtil(PropertiesUtil.getProps());
    VersionProxyService hmsClientProxyService;    VersionProxyService hmsDeserializerProxyService;    try {      String configPath = PropertiesUtil.getProperty(RANGER_SYNC_VERSION_PROXY_CONFIG_PATH);      if (StringUtils.isEmpty(configPath)) {        throw new IllegalArgumentException("Version proxy is no configured");      }            VersionProxyServiceFactory proxyServiceFactory =          VersionProxyServiceFactory.newBuilder()              .configLocation(configPath)              .build();      hmsClientProxyService = proxyServiceFactory.get(ISyncHMSClient.class);      hmsDeserializerProxyService = proxyServiceFactory.get(IMessageDeserializer.class);    } catch (Exception e) {      throw new RuntimeException("Version proxy service init failed", e);    }    for (String profile : profiles) {      SyncStateBank.disableState(profile, HMSSyncState.UNINITIALIZED);      SyncStateBank.enableState(profile, HMSSyncState.STARTING);      // 加载 profile 信息      String hmsVersion =          Optional.ofNullable(PropertiesUtil.getProperty(buildProfileHmsVersionKey(profile)))              .orElseThrow(                  () ->                      new IllegalArgumentException(                          "Profile[" + profile + "] has no mapping hms version"));      ProfileInfo profileInfo = new ProfileInfo(profile, hmsVersion, hmsDeserializerProxyService.createProxy(profile));      String[] catalogs =          PropertiesUtil.getPropertyStringList(              buildProfileCatalogsKey(profile), RANGER_SYNC_METASTORE_PROFILE_CATALOG_DEFAULT);      for (String catalog : catalogs) {        ProfileInfo.CatalogService catalogService = new ProfileInfo.CatalogService();        catalogService.setHiveServiceName(            Optional.ofNullable(                    PropertiesUtil.getProperty(buildHiveServiceNameKey(profile, catalog)))                .orElseThrow(                    () ->                        new IllegalArgumentException(                            "Catalog[" + catalog + "] has no mapping hive service")));        catalogService.setHdfsServiceName(            Optional.ofNullable(                    PropertiesUtil.getProperty(buildHdfsServiceNameKey(profile, catalog)))                .orElseThrow(                    () ->                        new IllegalArgumentException(                            "Catalog[" + catalog + "] has no mapping hdfs service")));        profileInfo.addCatalogServiceMapping(catalog, catalogService);      }      profileInfoMap.put(profile, profileInfo);
      try {
        lastProcessedEventIdMap.put(            profile, new AtomicLong(rangerSyncStore.getLastPersistedNotificationID(profile)));        fullUpdateHMSMap.put(profile, new AtomicBoolean(false));
        ISyncHMSClient hmsProxyServiceProxy = hmsClientProxyService.createProxy(profile);        hmsProxyServiceProxy.init(profile, hmsSyncPropertiesUtil, subject);        syncHMSClientMap.put(profile, hmsProxyServiceProxy);      } catch (Exception e) {        throw new RuntimeException(e);      }      SyncStateBank.enableState(profile, HMSSyncState.STARTED);    }
    hdfsPolicySyncHelper.profileInfoMap = profileInfoMap;    notificationFetcher = new NotificationFetcher(syncHMSClientMap);    notificationProcessor =        new NotificationProcessor(            txSyncAdapter,            rangerSvcStore,            rangerPolicyService,            rangerBizUtil,            syncConvertHelper,            hdfsPolicySyncHelper,            syncHMSClientMap);
    LOGGER.info("HMSFollower initialize successful");  }
  /**   * Processes new Hive Metastore notifications.   *   * <p>If no notifications are processed yet, then it does a full initial snapshot of the Hive   * Metastore followed by new notifications updates that could have happened after it.   */  public void syncUpWithHms(String profile) {    if (!initTx()) {      return;    }    // session 是和线程绑定的    if (!initSession()) {      LOGGER.info("Session initialize failed, skipping sync");      return;    }
    long notificationId = lastProcessedEventIdMap.get(profile).get();    if (SyncStateBank.isEnabled(profile, HMSSyncState.PAUSED)) {      LOGGER.info(          "[{}]HMS sync is suspend, event with notification id {} is delay",          profile,          notificationId);      return;    }
    try {      syncHMSClientMap.get(profile).connect();      SyncStateBank.enableState(profile, HMSSyncState.CONNECTED);    } catch (Throwable e) {      LOGGER.error("[" + profile + "]HMSFollower cannot connect to HMS!!", e);      return;    }
    try {      /* Before getting notifications, it checks if a full HMS snapshot is required. */      if (isFullSnapshotRequired(profile, notificationId)) {        if (PropertiesUtil.getBooleanProperty(            buildProfileInitialDeltaKey(profile), RANGER_SYNC_METASTORE_INITIAL_IS_DELTA_DEFAULT)) {          notificationId =              syncHMSClientMap.get(profile).getCurrentNotificationEventId()                  - 1; // 拉最新的一条,为了记录下event id          if (SyncStateBank.isEnabled(profile, HMSSyncState.FULL_UPDATE_RUNNING)) {            SyncStateBank.disableState(profile, HMSSyncState.FULL_UPDATE_RUNNING);          }        } else {          createFullSnapshot(profile);          return;        }      }      // 设置最大拉取500条      int pollMax = PropertiesUtil.getIntProperty(RANGER_SYNC_PULL_BATCH_MAX, 500);      List<MNotificationEvent> notifications =          notificationFetcher.fetchNotifications(profile, notificationId, pollMax);
      // After getting notifications, it checks if the HMS did some clean-up      // and notifications are out-of-sync with Ranger.      if (areNotificationsOutOfSync(profile, notifications, notificationId)) {        createFullSnapshot(profile);        return;      }
      // Continue with processing new notifications if no snapshots are done.      processNotifications(profile, notifications);    } catch (Throwable t) {      // catching errors to prevent the executor to halt.      LOGGER.error("Exception in HMSFollower! Caused by: " + t.getMessage(), t);      close();    }  }
  /**   * Checks if a new full HMS snapshot request is needed by checking if:   *   * <ul>   *   <li>Ranger HMS Notification table is EMPTY   *   <li>HDFSSync is enabled   *   <li>The current notification Id on the HMS is less than the latest processed by Ranger.   *   <li>Full Snapshot Signal is detected   * </ul>   *   * @param latestNotificationId The notification id to check against the HMS   * @return True if a full snapshot is required; False otherwise.   * @throws Exception If an error occurs while checking the rangerSyncStore or the HMS client.   */  private boolean isFullSnapshotRequired(String profile, long latestNotificationId)      throws Exception {    // 是空的说明肯定初始化,只能全量    if (rangerSyncStore.isHmsNotificationEmpty(profile)) {      LOGGER.debug(          "Ranger Store has no HMS Notifications. Create Full HMS Snapshot."              + "Latest sentry notification Id = {}",          latestNotificationId);      return true;    }
    long currentHmsNotificationId = notificationFetcher.getCurrentNotificationId(profile);    if (currentHmsNotificationId < latestNotificationId) {      LOGGER.info(          "The current notification ID on HMS = {} is less than the latest processed notification ID = {}. "              + "Need to request a full HMS snapshot",          currentHmsNotificationId,          latestNotificationId);      return true;    }
    // Check if forced full update is required, reset update flag to false.    // Only do it once per forced full update request.    if (fullUpdateHMSMap.get(profile).compareAndSet(true, false)) {      LOGGER.info("FULL UPDATE TRIGGER: initiating full HMS snapshot request");      return true;    }
    return false;  }
  /**   * Checks if the HMS and Ranger processed notifications are out-of-sync. This could happen because   * the HMS did some clean-up of old notifications and Ranger was not requesting notifications   * during that time.   *   * @param profile   * @param events All new notifications to check for an out-of-sync.   * @param latestProcessedId The latest notification processed by Ranger to check against the list   *     of notifications events.   * @return True if an out-of-sync is found; False otherwise.   */  private boolean areNotificationsOutOfSync(      String profile, Collection<MNotificationEvent> events, long latestProcessedId) {    if (events.isEmpty()) {      return false;    }
    /*     * If the sequence of notifications has a gap, then an out-of-sync might     * have happened due to the following issue:     *     * - HDFS sync was disabled or Ranger was shutdown for a time period longer than     * the HMS notification clean-up thread causing old notifications to be deleted.     *     * HMS notifications may contain both gaps in the sequence and duplicates     * (the same ID repeated more than once for different events).     *     * To accept duplicates (see NotificationFetcher for more info), then a gap is found     * if the 1st notification received is higher than the current ID processed + 1.     * i.e.     *   1st ID = 3, latest ID = 3 (duplicate found but no gap detected)     *   1st ID = 4, latest ID = 3 (consecutive ID found but no gap detected)     *   1st ID = 5, latest ID = 3 (a gap is detected)     */
    List<MNotificationEvent> eventList = (List<MNotificationEvent>) events;    long firstNotificationId = eventList.get(0).getEventId();
    if (firstNotificationId > (latestProcessedId + 1)) {      LOGGER.info(          "[{}]First HMS event id = {} is greater than latest Ranger processed id = {} + 1."              + "Need to request a full HMS snapshot.",          profile,          firstNotificationId,          latestProcessedId);      return true;    }
    return false;  }
  /**   * Request for full snapshot and persists it if there is no snapshot available in the sentry   * store. Also, wakes-up any waiting clients.   */  private void createFullSnapshot(String profile) {    LOGGER.info("[{}]Attempting to take full HMS snapshot", profile);    try {      // Set that the full update is running      if (!SyncStateBank.isEnabled(profile, HMSSyncState.FULL_UPDATE_RUNNING)) {        SyncStateBank.enableState(profile, HMSSyncState.FULL_UPDATE_RUNNING);      }
      PathsImage snapshotInfo = syncHMSClientMap.get(profile).getFullSnapshot();      if (snapshotInfo.getPathImage().isEmpty()) {        LOGGER.debug("Received empty path image from HMS while taking a full snapshot");        return;      }
      try {        LOGGER.info(            "[{}]Persisting full snapshot for notification Id = {}", profile, snapshotInfo.getId());        notificationProcessor.onFullPathsImage(snapshotInfo, profileInfoMap.get(profile));      } catch (Exception failure) {        LOGGER.error("[{}]Received exception while persisting HMS path full snapshot", profile);        throw failure;      }      // HMSFollower connected to HMS, and it finished full snapshot if that was required      // Log this message only once      if (rangerSyncStore.persistLastProcessedNotificationID(profile, snapshotInfo.getId(), 1)) {        lastProcessedEventIdMap.get(profile).set(snapshotInfo.getId());        LOGGER.info("[{}]Ranger HMS support is ready", profile);      } else {        SyncStateBank.enableState(profile, HMSSyncState.PAUSED);        LOGGER.error(            "[{}]Event id {} persist failed, HmsFollower set to pause",            profile,            snapshotInfo.getId());      }    } catch (Exception failure) {      LOGGER.error("[{}]Received exception while creating HMS path full snapshot", profile);      throw failure;    } finally {      if (SyncStateBank.isEnabled(profile, HMSSyncState.FULL_UPDATE_RUNNING)) {        SyncStateBank.disableState(profile, HMSSyncState.FULL_UPDATE_RUNNING);      }    }  }
  /**   * Process the collection of notifications and wake up any waiting clients. Also, persists the   * notification ID regardless of processing result.   *   * @param events list of event to be processed   */  public void processNotifications(String profile, List<MNotificationEvent> events) {    if (!SyncStateBank.isEnabled(profile, HMSSyncState.INCREMENTAL_UPDATE_RUNNING)) {      SyncStateBank.enableState(profile, HMSSyncState.INCREMENTAL_UPDATE_RUNNING);    }    if (events.isEmpty()) {      return;    }    HmsRecordLogger hmsRecordLogger = new HmsRecordLogger(profile, events);    hmsRecordLogger.start();    txExecute(        () -> {          boolean isNotificationProcessed;          for (int i = 0; i < events.size(); i++) {            MNotificationEvent event = events.get(i);            if (SyncStateBank.isEnabled(profile, HMSSyncState.PAUSED)) {              LOGGER.info("[{}]Hms sync is paused, skip the remaining events", profile);              break;            }            try {              isNotificationProcessed =                  notificationProcessor.processNotificationEvent(                      profileInfoMap.get(profile), event);            } catch (HmsSyncException e) {              SyncStateBank.enableState(profile, HMSSyncState.PAUSED);              SyncStateBank.disableState(profile, HMSSyncState.INCREMENTAL_UPDATE_RUNNING);              LOGGER.error(                  "[{}]Event id: {} processing exception, pause sync... ",                  profile,                  event.getEventId(),                  e);              break;            } catch (HmsServiceException e) {              // TODO 服务类异常              SyncStateBank.enableState(profile, HMSSyncState.PAUSED);              SyncStateBank.disableState(profile, HMSSyncState.INCREMENTAL_UPDATE_RUNNING);              LOGGER.error(                  "[{}]Processing the notification with ID:{} failed!",                  profile,                  event.getEventId(),                  e);              break;            } catch (Exception e) {              if (e.getCause() instanceof JPARSException) {                LOGGER.info(                    "[{}]Received JDO Storage Exception, Could be because of processing duplicate notification",                    profile);                if (event.getEventId() <= rangerSyncStore.getLastPersistedNotificationID(profile)) {                  // Rest of the notifications need not be processed.                  LOGGER.error(                      "[{}]Received event with Id: {} which is smaller then the ID persisted in store",                      profile,                      event.getEventId());                  break;                }              }              SyncStateBank.enableState(profile, HMSSyncState.PAUSED);              SyncStateBank.disableState(profile, HMSSyncState.INCREMENTAL_UPDATE_RUNNING);              LOGGER.error(                  "[{}]Processing the notification with ID:{} failed!",                  profile,                  event.getEventId(),                  e);              break;            }            if (!isNotificationProcessed) {              LOGGER.warn(                  "[{}]The listener encountered an issue while processing the data of HMSFollower (event ID {}), "                      + "and the remaining events will not be executed. "                      + "They will be processed in the next normal batch.",                  profile,                  event.getEventId());              break;            }            try {              if (isNotificationProcessed || i == events.size() - 1) {                if (rangerSyncStore.persistLastProcessedNotificationID(                        profile, event.getEventId(), 0)) {                  long expect = event.getEventId() - 1;                  if (!lastProcessedEventIdMap                          .get(profile)                          .compareAndSet(expect, event.getEventId())) {                    if (!SyncStateBank.isEnabled(profile, HMSSyncState.SKIP_CHANGE)) {                      long lastPersistedNotificationID =                              rangerSyncStore.getLastPersistedNotificationID(profile);                      LOGGER.warn(                              "[{}]The event id maintained by HmsFollower does not self increment "                                      + "and the state is not in skip. "                                      + "Expected value is [{}], current value is [{}], reset to [{}]",                              profile,                              expect,                              lastProcessedEventIdMap.get(profile).get(),                              lastPersistedNotificationID);                      lastProcessedEventIdMap.get(profile).set(lastPersistedNotificationID);                    }                  }                }              }            } catch (Exception failure) {              LOGGER.error(                  "[{}]Received exception while persisting the notification ID = {}, the hms sync will be paused, Caused by {}",                  profile,                  event.getEventId(),                  ExceptionUtils.getFullStackTrace(failure));              SyncStateBank.enableState(profile, HMSSyncState.PAUSED);              SyncStateBank.disableState(profile, HMSSyncState.INCREMENTAL_UPDATE_RUNNING);            }          }        });    hmsRecordLogger.end();  }
  public void close() {    syncHMSClientMap.forEach(        (key, value) -> {          try {            value.disconnect();            SyncStateBank.disableState(key, HMSSyncState.CONNECTED);          } catch (Exception failure) {            LOGGER.error("Failed to close the Ranger HMS Client", failure);          }        });    notificationFetcher.close();    notificationProcessor.close();  }
  public void resetEventId(HmsSyncParam hmsSyncParam) {    LOGGER.info("reset hms event id to [{}]", hmsSyncParam);    SyncStateBank.enableState(hmsSyncParam.getProfile(), HMSSyncState.SKIP_CHANGE);    lastProcessedEventIdMap.get(hmsSyncParam.getProfile()).set(hmsSyncParam.getEventId());    if (hmsSyncParam.getEventId()        > rangerSyncStore.getLastPersistedNotificationID(hmsSyncParam.getProfile())) {      rangerSyncStore.persistLastProcessedNotificationID(          hmsSyncParam.getProfile(), hmsSyncParam.getEventId(), 0);    } else {      LOGGER.info(          "[{}]The reset event id is less than the persisted, only modify the memory value",          hmsSyncParam);    }  }
  public void initEventIdAndInitStatus(String profile) {    long lastPersistedNotificationID = rangerSyncStore.getLastPersistedNotificationID(profile);    if (lastPersistedNotificationID > lastProcessedEventIdMap.get(profile).get()        && !SyncStateBank.isEnabled(profile, HMSSyncState.SKIP_CHANGE)) {      lastProcessedEventIdMap.get(profile).set(lastPersistedNotificationID);    }  }
  public long getLastProcessedEventIdCache(String profile) {    return lastProcessedEventIdMap.get(profile).get();  }
  public List<MNotificationEvent> fetchNotifications(String profile, long offset, int size)      throws Exception {    if (size > 0) {      return notificationFetcher.fetchNotifications(profile, offset, size);    } else {      return notificationFetcher.fetchNotifications(profile, offset);    }  }}
posted @ 2026-02-02 11:20  vincent128  阅读(0)  评论(0)    收藏  举报