今天写pig脚本时,范了个低级错误,在awk中使用了sub作为变量名,结果执行pig脚本总报错
2.txt文件有两列内容
256;005;006;578,005
005;006,007,259
007;598,007
功能要求:从第一列中匹配第二列的内容,匹配到的输出
--*********************************************************************
a = load '2.txt' using PigStorage(',') as (c1:chararray,c2:chararray);
b = stream a through `awk '
{
sub1=$1;
subc=$2;
if(index(sub1,subc) > 0)
{
print $1"\t"$2;
}
}'`;
dump b;
a = load '2.txt' using PigStorage(',') as (c1:chararray,c2:chararray);
b = stream a through `awk '
{
sub1=$1;
subc=$2;
if(sub1 == "NA" || match(sub1,subc))
{
print $1"\t"$2;
}
}'`;
dump b;
遇到分号,在pig stream awk中要用ASCII码
g = STREAM f1 THROUGH `awk '
{
if(index($2,",") > 0)
{
split($2,arrjob,",");
}
else
{
split($2,arrjob,"\\x3B");
}
result="";
for(i=1;i<=length(arrjob);i++)
{
split(arrjob[i],arrId,"_");
result=$1"\t"arrId[1];
print result;
}
}'`;
正则匹配举例,$4必须是数字
b8 = STREAM b71 THROUGH `awk '
{
result=""
if(length($4) == 15 && match($4,"^[0-9]+$"))
{
result=$1"\t"$2"\t"$3"\t1"
}
else if(length($4) == 20 && match($4,"^[0-9]+$"))
{
result=$1"\t"$2"\t"$3"\t"substr($4,1,9)"_"substr($4,10)
}
if(length(result) > 0)
print result;
}'`;
替换字符串中的"{}()"
a = load '1.txt' as (data:chararray);
b = stream a through `awk '
{
u=$1
gsub("([{]|[}]|[\)]|[\(])*","",u)
print u
}'`;
dump b;
浙公网安备 33010602011771号