package com.hello.hbase;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.FlumeException;
import org.apache.flume.conf.ComponentConfiguration;
import org.apache.flume.sink.hbase.HbaseEventSerializer;
import org.apache.hadoop.hbase.client.Increment;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Row;
import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
public class FlumeHbaseEventSerializer implements HbaseEventSerializer {
// Config vars
/** Regular expression used to parse groups from event data. */
public static final String REGEX_CONFIG = "regex";
public static final String REGEX_DEFAULT = " ";
/** Whether to ignore case when performing regex matches. */
public static final String IGNORE_CASE_CONFIG = "regexIgnoreCase";
public static final boolean INGORE_CASE_DEFAULT = false;
/** Comma separated list of column names to place matching groups in. */
public static final String COL_NAME_CONFIG = "colNames";
public static final String COLUMN_NAME_DEFAULT = "ip";
/** Index of the row key in matched regex groups */
public static final String ROW_KEY_INDEX_CONFIG = "rowKeyIndex";
/** Placeholder in colNames for row key */
public static final String ROW_KEY_NAME = "ROW_KEY";
/** Whether to deposit event headers into corresponding column qualifiers */
public static final String DEPOSIT_HEADERS_CONFIG = "depositHeaders";
public static final boolean DEPOSIT_HEADERS_DEFAULT = false;
/** What charset to use when serializing into HBase's byte arrays */
public static final String CHARSET_CONFIG = "charset";
public static final String CHARSET_DEFAULT = "UTF-8";
/*
* This is a nonce used in HBase row-keys, such that the same row-key never
* gets written more than once from within this JVM.
*/
protected static final AtomicInteger nonce = new AtomicInteger(0);
protected static String randomKey = RandomStringUtils.randomAlphanumeric(10);
protected byte[] cf;
private byte[] payload;
private List<byte[]> colNames = Lists.newArrayList();
private boolean regexIgnoreCase;
private Charset charset;
@Override
public void configure(Context context) {
String regex = context.getString(REGEX_CONFIG, REGEX_DEFAULT);
regexIgnoreCase = context.getBoolean(IGNORE_CASE_CONFIG, INGORE_CASE_DEFAULT);
context.getBoolean(DEPOSIT_HEADERS_CONFIG, DEPOSIT_HEADERS_DEFAULT);
Pattern.compile(regex, Pattern.DOTALL + (regexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
charset = Charset.forName(context.getString(CHARSET_CONFIG, CHARSET_DEFAULT));
String cols = new String(context.getString("columns"));
String colNameStr;
if (cols != null && !"".equals(cols)) {
colNameStr = cols;
} else {
colNameStr = context.getString(COL_NAME_CONFIG, COLUMN_NAME_DEFAULT);
}
String[] columnNames = colNameStr.split(",");
for (String s : columnNames) {
colNames.add(s.getBytes(charset));
}
}
@Override
public void configure(ComponentConfiguration conf) {}
@Override
public void initialize(Event event, byte[] columnFamily) {
event.getHeaders();
this.payload = event.getBody();
this.cf = columnFamily;
}
protected byte[] getRowKey(Calendar cal) {
String str = new String(payload, charset);
String tmp = str.replace("\"", "");
String[] arr = tmp.split(" ");
String log_data = arr[4];
String[] param_arr = log_data.split("&");
String userid = param_arr[0];
String itemid = param_arr[1];
String type = param_arr[2];
String ip_str = param_arr[3];
// String dataStr = arr[3].replace("[", "");
// String rowKey = getDate2Str(dataStr) + "-" + clientIp + "-" + nonce.getAndIncrement();
String rowKey = ip_str + "-" + nonce.getAndIncrement();
return rowKey.getBytes(charset);
}
protected byte[] getRowKey() {
return getRowKey(Calendar.getInstance());
}
@Override
public List<Row> getActions() throws FlumeException {
List<Row> actions = Lists.newArrayList();
byte[] rowKey;
String body = new String(payload, charset);
String tmp = body.replace("\"", "");
// String[] arr = tmp.split(REGEX_DEFAULT);
String[] arr = tmp.split(" ");
String log_data = arr[4];
String[] param_arr = log_data.split("&");
String userid = param_arr[0].split("=")[1];
String itemid = param_arr[1].split("=")[1];
String type = param_arr[2].split("=")[1];
String ip_str = param_arr[3].split("=")[1];
System.out.println("===========");
System.out.println("===========");
System.out.println("===========");
System.out.println("===========");
System.out.println(userid);
System.out.println(itemid);
System.out.println(type);
System.out.println(ip_str);
System.out.println("===========");
System.out.println("===========");
System.out.println("===========");
System.out.println("===========");
try {
rowKey = getRowKey();
Put put = new Put(rowKey);
put.add(cf, colNames.get(0), userid.getBytes(Charsets.UTF_8));
put.add(cf, colNames.get(1), itemid.getBytes(Charsets.UTF_8));
put.add(cf, colNames.get(2), type.getBytes(Charsets.UTF_8));
put.add(cf, colNames.get(3), ip_str.getBytes(Charsets.UTF_8));
actions.add(put);
} catch (Exception e) {
throw new FlumeException("Could not get row key!", e);
}
return actions;
}
@Override
public List<Increment> getIncrements() {
return Lists.newArrayList();
}
@Override
public void close() {}
public static String getDate2Str(String dataStr) {
SimpleDateFormat formatter = null;
SimpleDateFormat format = null;
Date date = null;
try {
formatter = new SimpleDateFormat("dd/MMM/yyyy:hh:mm:ss", Locale.ENGLISH);
date = formatter.parse(dataStr);
format = new SimpleDateFormat("yyyy-MM-dd-HH:mm:ss");
} catch (Exception e) {
e.printStackTrace();
}
return format.format(date);
}
}