Functions |
Pig commands |
SELECT |
FOREACH alias GENERATE column_name,column_name; |
SELECT* |
FOREACH alias GENERATE *; |
DISTINCT |
DISTINCT(FOREACH aliasgenerate column_name, column_name); |
WHERE |
FOREACH (FILTER alias BY column_nameoperator value)GENERATE column_name, column_name; |
AND/OR |
FILTER alias BY (column_name operator value1AND column_name operator value2)OR column_name operator value3; |
ORDER BY |
ORDER alias BY column_name ASC|DESC,column_name ASC|DESC; |
TOP/LIMIT |
FOREACH (GROUP alias BY column_name)GENERATE LIMIT alias number;TOP(number, column_index, alias); |
GROUP BY |
FOREACH (GROUP alias BY column_name)GENERATE function(alias.column_name); |
LIKE |
FILTER alias BY REGEX_EXTRACT(column_name,pattern, 1) IS NOT NULL; |
IN |
FILTER alias BY column_name IN(value1, value2,…); |
JOIN |
FOREACH (JOIN alias1 BY column_name,alias2 BY column_name)GENERATE column_name(s); |
LEFT/RIGHT/FULL OUTERJOIN |
FOREACH(JOINalias1 BY column_name LEFT|RIGHT|FULL,alias2 BY column_name) GENERATE column_name(s); |
UNION ALL |
UNION alias1, alias2; |
AVG |
FOREACH (GROUP Alias ALL) GENERATEAVG(alias.column_name); |
COUNT |
FOREACH (GROUP alias ALL) GENERATE COUNT(alias); |
COUNT DISTINCT |
FOREACH alias{Unique _column=DISTINT Column_name);}; |
MAX |
FOREACH(GROUP aliasALL) GENERATE MAX(alias.column_name); |
MIN |
FOREACH (GROUP aliasALL)GENERATE MIN(alias.column_name) |
SUM |
FOREACH (GROUP aliasALL)GEENRATE SUM(alias.column_name); |
HAVING |
FILTER alias BYAggregate_function(column_name)operatorValue; |
UCASE/UPPER |
FOREACH aliasGENERATEUPPER(column_name); |
LCASE/LOWER |
FOREACH aliasGENERATELOWER(column_name); |
SUBSTRING |
FOREACH aliasGENERATESUBSTRING(column_name,start,Star+length) as Some_name; |
LEN |
FOREACH aliasGENERATE SIZE(column_name) |
ROUND |
FOREACH aliasGENEARATE ROUND(column_name); |