standard aggs required_capability: categorize_v6 FROM sample_data | STATS count=COUNT(), sum=SUM(event_duration), avg=AVG(event_duration), count_distinct=COUNT_DISTINCT(event_duration) BY category=CATEGORIZE(message) | SORT count DESC, category ; count:long | sum:long | avg:double | count_distinct:long | category:keyword 3 | 7971589 | 2657196.3333333335 | 3 | .*?Connected.+?to.*? 3 | 14027356 | 4675785.333333333 | 3 | .*?Connection.+?error.*? 1 | 1232382 | 1232382.0 | 1 | .*?Disconnected.*? ; values aggs required_capability: categorize_v6 FROM sample_data | STATS values=MV_SORT(VALUES(message)), top=TOP(event_duration, 2, "DESC") BY category=CATEGORIZE(message) | SORT category ; values:keyword | top:long | category:keyword [Connected to 10.1.0.1, Connected to 10.1.0.2, Connected to 10.1.0.3] | [3450233, 2764889] | .*?Connected.+?to.*? [Connection error] | [8268153, 5033755] | .*?Connection.+?error.*? [Disconnected] | 1232382 | .*?Disconnected.*? ; mv required_capability: categorize_v6 FROM mv_sample_data | STATS COUNT(), SUM(event_duration) BY category=CATEGORIZE(message) | SORT category ; COUNT():long | SUM(event_duration):long | category:keyword 7 | 23231327 | .*?Banana.*? 3 | 7971589 | .*?Connected.+?to.*? 3 | 14027356 | .*?Connection.+?error.*? 1 | 1232382 | .*?Disconnected.*? ; row mv required_capability: categorize_v6 ROW message = ["connected to a", "connected to b", "disconnected"], str = ["a", "b", "c"] | STATS COUNT(), VALUES(str) BY category=CATEGORIZE(message) | SORT category ; COUNT():long | VALUES(str):keyword | category:keyword 2 | [a, b, c] | .*?connected.+?to.*? 1 | [a, b, c] | .*?disconnected.*? ; limit before stats required_capability: categorize_v6 FROM sample_data | SORT message | LIMIT 4 | STATS count=COUNT() BY category=CATEGORIZE(message) | SORT category ; count:long | category:keyword 3 | .*?Connected.+?to.*? 1 | .*?Connection.+?error.*? ; skips stopwords required_capability: categorize_v6 ROW message = ["Mon Tue connected to a", "Jul Aug connected to b September ", "UTC connected GMT to c UTC"] | STATS COUNT() BY category=CATEGORIZE(message) | SORT category ; COUNT():long | category:keyword 3 | .*?connected.+?to.*? ; with multiple indices required_capability: categorize_v6 required_capability: union_types FROM sample_data* | STATS COUNT() BY category=CATEGORIZE(message) | SORT category ; COUNT():long | category:keyword 12 | .*?Connected.+?to.*? 12 | .*?Connection.+?error.*? 4 | .*?Disconnected.*? ; mv with many values required_capability: categorize_v6 FROM employees | STATS COUNT() BY category=CATEGORIZE(job_positions) | SORT category | LIMIT 5 ; COUNT():long | category:keyword 18 | .*?Accountant.*? 13 | .*?Architect.*? 11 | .*?Business.+?Analyst.*? 13 | .*?Data.+?Scientist.*? 10 | .*?Head.+?Human.+?Resources.*? ; mv with many values and SUM required_capability: categorize_v6 FROM employees | STATS SUM(languages) BY category=CATEGORIZE(job_positions) | SORT category | LIMIT 3 ; SUM(languages):long | category:keyword 43 | .*?Accountant.*? 46 | .*?Architect.*? 35 | .*?Business.+?Analyst.*? ; mv with many values and nulls and SUM required_capability: categorize_v6 FROM employees | STATS SUM(languages) BY category=CATEGORIZE(job_positions) | SORT category DESC | LIMIT 2 ; SUM(languages):long | category:keyword 27 | null 46 | .*?Tech.+?Lead.*? ; mv via eval required_capability: categorize_v6 FROM sample_data | EVAL message = MV_APPEND(message, "Banana") | STATS COUNT() BY category=CATEGORIZE(message) | SORT category ; COUNT():long | category:keyword 7 | .*?Banana.*? 3 | .*?Connected.+?to.*? 3 | .*?Connection.+?error.*? 1 | .*?Disconnected.*? ; mv via eval const required_capability: categorize_v6 FROM sample_data | EVAL message = ["Banana", "Bread"] | STATS COUNT() BY category=CATEGORIZE(message) | SORT category ; COUNT():long | category:keyword 7 | .*?Banana.*? 7 | .*?Bread.*? ; mv via eval const without aliases required_capability: categorize_v6 FROM sample_data | EVAL message = ["Banana", "Bread"] | STATS COUNT() BY CATEGORIZE(message) | SORT `CATEGORIZE(message)` ; COUNT():long | CATEGORIZE(message):keyword 7 | .*?Banana.*? 7 | .*?Bread.*? ; mv const in parameter required_capability: categorize_v6 FROM sample_data | STATS COUNT() BY c = CATEGORIZE(["Banana", "Bread"]) | SORT c ; COUNT():long | c:keyword 7 | .*?Banana.*? 7 | .*?Bread.*? ; agg alias shadowing required_capability: categorize_v6 FROM sample_data | STATS c = COUNT() BY c = CATEGORIZE(["Banana", "Bread"]) | SORT c ; warning:Line 2:9: Field 'c' shadowed by field at line 2:24 c:keyword .*?Banana.*? .*?Bread.*? ; chained aggregations using categorize required_capability: categorize_v6 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) | STATS COUNT() BY category=CATEGORIZE(category) | SORT category ; COUNT():long | category:keyword 1 | .*?Connected.+?to.*? 1 | .*?Connection.+?error.*? 1 | .*?Disconnected.*? ; stats without aggs required_capability: categorize_v6 FROM sample_data | STATS BY category=CATEGORIZE(message) | SORT category ; category:keyword .*?Connected.+?to.*? .*?Connection.+?error.*? .*?Disconnected.*? ; text field required_capability: categorize_v6 FROM hosts | STATS COUNT() BY category=CATEGORIZE(host_group) | SORT category ; COUNT():long | category:keyword 2 | .*?DB.+?servers.*? 2 | .*?Gateway.+?instances.*? 5 | .*?Kubernetes.+?cluster.*? 1 | null // Note: DB is removed from "DB servers", because the ml_standard // tokenizer drops numbers, including hexadecimal ones. ; on TO_UPPER required_capability: categorize_v6 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(TO_UPPER(message)) | SORT category ; COUNT():long | category:keyword 3 | .*?CONNECTED.+?TO.*? 3 | .*?CONNECTION.+?ERROR.*? 1 | .*?DISCONNECTED.*? ; on CONCAT required_capability: categorize_v6 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " banana")) | SORT category ; COUNT():long | category:keyword 3 | .*?Connected.+?to.+?banana.*? 3 | .*?Connection.+?error.+?banana.*? 1 | .*?Disconnected.+?banana.*? ; on CONCAT with unicode required_capability: categorize_v6 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " 👍🏽😊")) | SORT category ; COUNT():long | category:keyword 3 | .*?Connected.+?to.+?👍🏽.+?😊.*? 3 | .*?Connection.+?error.+?👍🏽.+?😊.*? 1 | .*?Disconnected.+?👍🏽.+?😊.*? ; on REVERSE(CONCAT()) required_capability: categorize_v6 required_capability: fn_reverse_grapheme_clusters FROM sample_data | STATS COUNT() BY category=CATEGORIZE(REVERSE(CONCAT(message, " 👍🏽😊"))) | SORT category ; COUNT():long | category:keyword 1 | .*?😊.+?👍🏽.+?detcennocsiD.*? 3 | .*?😊.+?👍🏽.+?ot.+?detcennoC.*? 3 | .*?😊.+?👍🏽.+?rorre.+?noitcennoC.*? ; and then TO_LOWER required_capability: categorize_v6 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) | EVAL category=TO_LOWER(category) | SORT category ; COUNT():long | category:keyword 3 | .*?connected.+?to.*? 3 | .*?connection.+?error.*? 1 | .*?disconnected.*? ; on const empty string required_capability: categorize_v6 FROM sample_data | STATS COUNT() BY category=CATEGORIZE("") | SORT category ; COUNT():long | category:keyword 7 | null ; on const empty string from eval required_capability: categorize_v6 FROM sample_data | EVAL x = "" | STATS COUNT() BY category=CATEGORIZE(x) | SORT category ; COUNT():long | category:keyword 7 | null ; on null required_capability: categorize_v6 FROM sample_data | EVAL x = null | STATS COUNT(), SUM(event_duration) BY category=CATEGORIZE(x) | SORT category ; COUNT():long | SUM(event_duration):long | category:keyword 7 | 23231327 | null ; on null string required_capability: categorize_v6 FROM sample_data | EVAL x = null::string | STATS COUNT() BY category=CATEGORIZE(x) | SORT category ; COUNT():long | category:keyword 7 | null ; on const null required_capability: categorize_v6 FROM sample_data | STATS COUNT(), SUM(event_duration) BY category=CATEGORIZE(null) | SORT category ; COUNT():long | SUM(event_duration):long | category:keyword 7 | 23231327 | null ; on null row required_capability: categorize_v6 ROW message = null, str = ["a", "b", "c"] | STATS COUNT(), VALUES(str) BY category=CATEGORIZE(message) ; COUNT():long | VALUES(str):keyword | category:keyword 1 | [a, b, c] | null ; filtering out all data required_capability: categorize_v6 FROM sample_data | WHERE @timestamp < "2023-10-23T00:00:00Z" | STATS COUNT() BY category=CATEGORIZE(message) | SORT category ; COUNT():long | category:keyword ; filtering out all data with constant required_capability: categorize_v6 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) | WHERE false ; COUNT():long | category:keyword ; drop output columns required_capability: categorize_v6 FROM sample_data | STATS count=COUNT() BY category=CATEGORIZE(message) | EVAL x=1 | DROP count, category ; x:integer 1 1 1 ; category value processing required_capability: categorize_v6 ROW message = ["connected to a", "connected to b", "disconnected"] | STATS COUNT() BY category=CATEGORIZE(message) | EVAL category = TO_UPPER(category) | SORT category ; COUNT():long | category:keyword 2 | .*?CONNECTED.+?TO.*? 1 | .*?DISCONNECTED.*? ; row aliases required_capability: categorize_v6 ROW message = "connected to xyz" | EVAL x = message | STATS COUNT() BY category=CATEGORIZE(x) | EVAL y = category | SORT y ; COUNT():long | category:keyword | y:keyword 1 | .*?connected.+?to.+?xyz.*? | .*?connected.+?to.+?xyz.*? ; from aliases required_capability: categorize_v6 FROM sample_data | EVAL x = message | STATS COUNT() BY category=CATEGORIZE(x) | EVAL y = category | SORT y ; COUNT():long | category:keyword | y:keyword 3 | .*?Connected.+?to.*? | .*?Connected.+?to.*? 3 | .*?Connection.+?error.*? | .*?Connection.+?error.*? 1 | .*?Disconnected.*? | .*?Disconnected.*? ; row aliases with keep required_capability: categorize_v6 ROW message = "connected to xyz" | EVAL x = message | KEEP x | STATS COUNT() BY category=CATEGORIZE(x) | EVAL y = category | KEEP `COUNT()`, y | SORT y ; COUNT():long | y:keyword 1 | .*?connected.+?to.+?xyz.*? ; from aliases with keep required_capability: categorize_v6 FROM sample_data | EVAL x = message | KEEP x | STATS COUNT() BY category=CATEGORIZE(x) | EVAL y = category | KEEP `COUNT()`, y | SORT y ; COUNT():long | y:keyword 3 | .*?Connected.+?to.*? 3 | .*?Connection.+?error.*? 1 | .*?Disconnected.*? ; row rename required_capability: categorize_v6 ROW message = "connected to xyz" | RENAME message as x | STATS COUNT() BY category=CATEGORIZE(x) | RENAME category as y | SORT y ; COUNT():long | y:keyword 1 | .*?connected.+?to.+?xyz.*? ; from rename required_capability: categorize_v6 FROM sample_data | RENAME message as x | STATS COUNT() BY category=CATEGORIZE(x) | RENAME category as y | SORT y ; COUNT():long | y:keyword 3 | .*?Connected.+?to.*? 3 | .*?Connection.+?error.*? 1 | .*?Disconnected.*? ; row drop required_capability: categorize_v6 ROW message = "connected to a" | STATS c = COUNT() BY category=CATEGORIZE(message) | DROP category | SORT c ; c:long 1 ; from drop required_capability: categorize_v6 FROM sample_data | STATS c = COUNT() BY category=CATEGORIZE(message) | DROP category | SORT c ; c:long 1 3 3 ; reuse categorize arg expression in agg required_capability: categorize_v6 FROM sample_data | STATS m = MAX(LENGTH(CONCAT(message, "_end"))) BY c = CATEGORIZE(CONCAT(message, "_end")) | SORT m ; m:integer |c:keyword 16 |.*?Disconnected_end.*? 20 |.*?Connection.+?error_end.*? 25 |.*?Connected.+?to.*? ; categorize in aggs inside function required_capability: categorize_v6 FROM sample_data | STATS COUNT(), x = MV_APPEND(category, category) BY category=CATEGORIZE(message) | SORT x | KEEP `COUNT()`, x ; COUNT():long | x:keyword 3 | [.*?Connected.+?to.*?,.*?Connected.+?to.*?] 3 | [.*?Connection.+?error.*?,.*?Connection.+?error.*?] 1 | [.*?Disconnected.*?,.*?Disconnected.*?] ; categorize in aggs same as grouping inside function required_capability: categorize_v6 FROM sample_data | STATS COUNT(), x = MV_APPEND(CATEGORIZE(message), `CATEGORIZE(message)`) BY CATEGORIZE(message) | SORT x | KEEP `COUNT()`, x ; COUNT():long | x:keyword 3 | [.*?Connected.+?to.*?,.*?Connected.+?to.*?] 3 | [.*?Connection.+?error.*?,.*?Connection.+?error.*?] 1 | [.*?Disconnected.*?,.*?Disconnected.*?] ; categorize in aggs same as grouping inside function with explicit alias required_capability: categorize_v6 FROM sample_data | STATS COUNT(), x = MV_APPEND(CATEGORIZE(message), category) BY category=CATEGORIZE(message) | SORT x | KEEP `COUNT()`, x ; COUNT():long | x:keyword 3 | [.*?Connected.+?to.*?,.*?Connected.+?to.*?] 3 | [.*?Connection.+?error.*?,.*?Connection.+?error.*?] 1 | [.*?Disconnected.*?,.*?Disconnected.*?] ; multiple groupings with categorize and ip required_capability: categorize_multiple_groupings FROM sample_data | STATS count=COUNT() BY category=CATEGORIZE(message), client_ip | SORT category, client_ip ; count:long | category:keyword | client_ip:ip 1 | .*?Connected.+?to.*? | 172.21.2.113 1 | .*?Connected.+?to.*? | 172.21.2.162 1 | .*?Connected.+?to.*? | 172.21.3.15 3 | .*?Connection.+?error.*? | 172.21.3.15 1 | .*?Disconnected.*? | 172.21.0.5 ; multiple groupings with categorize and bucketed timestamp required_capability: categorize_multiple_groupings FROM sample_data | STATS count=COUNT() BY category=CATEGORIZE(message), timestamp=BUCKET(@timestamp, 1 HOUR) | SORT category, timestamp ; count:long | category:keyword | timestamp:datetime 2 | .*?Connected.+?to.*? | 2023-10-23T12:00:00.000Z 1 | .*?Connected.+?to.*? | 2023-10-23T13:00:00.000Z 3 | .*?Connection.+?error.*? | 2023-10-23T13:00:00.000Z 1 | .*?Disconnected.*? | 2023-10-23T13:00:00.000Z ; multiple groupings with categorize and limit before stats required_capability: categorize_multiple_groupings FROM sample_data | SORT message | LIMIT 5 | STATS count=COUNT() BY category=CATEGORIZE(message), client_ip | SORT category, client_ip ; count:long | category:keyword | client_ip:ip 1 | .*?Connected.+?to.*? | 172.21.2.113 1 | .*?Connected.+?to.*? | 172.21.2.162 1 | .*?Connected.+?to.*? | 172.21.3.15 2 | .*?Connection.+?error.*? | 172.21.3.15 ; multiple groupings with categorize and nulls required_capability: categorize_multiple_groupings FROM employees | STATS SUM(languages) BY category=CATEGORIZE(job_positions), gender | SORT category DESC, gender ASC | LIMIT 5 ; SUM(languages):long | category:keyword | gender:keyword 11 | null | F 16 | null | M 14 | .*?Tech.+?Lead.*? | F 23 | .*?Tech.+?Lead.*? | M 9 | .*?Tech.+?Lead.*? | null ; multiple groupings with categorize and a field that's always null required_capability: categorize_multiple_groupings FROM sample_data | EVAL nullfield = null | STATS count=COUNT() BY category=CATEGORIZE(nullfield), client_ip | SORT client_ip ; count:long | category:keyword | client_ip:ip 1 | null | 172.21.0.5 1 | null | 172.21.2.113 1 | null | 172.21.2.162 4 | null | 172.21.3.15 ; multiple groupings with categorize and the same text field required_capability: categorize_multiple_groupings FROM sample_data | STATS count=COUNT() BY category=CATEGORIZE(message), message | SORT message ; count:long | category:keyword | message:keyword 1 | .*?Connected.+?to.*? | Connected to 10.1.0.1 1 | .*?Connected.+?to.*? | Connected to 10.1.0.2 1 | .*?Connected.+?to.*? | Connected to 10.1.0.3 3 | .*?Connection.+?error.*? | Connection error 1 | .*?Disconnected.*? | Disconnected ; multiple additional complex groupings with categorize required_capability: categorize_multiple_groupings FROM sample_data | STATS count=COUNT(), duration=SUM(event_duration) BY category=CATEGORIZE(message), SUBSTRING(message, 1, 7), ip_part=TO_LONG(SUBSTRING(TO_STRING(client_ip), 8, 1)), hour=BUCKET(@timestamp, 1 HOUR) | SORT ip_part, category ; count:long | duration:long | category:keyword | SUBSTRING(message, 1, 7):keyword | ip_part:long | hour:datetime 1 | 1232382 | .*?Disconnected.*? | Disconn | 0 | 2023-10-23T13:00:00.000Z 2 | 6215122 | .*?Connected.+?to.*? | Connect | 2 | 2023-10-23T12:00:00.000Z 1 | 1756467 | .*?Connected.+?to.*? | Connect | 3 | 2023-10-23T13:00:00.000Z 3 | 14027356 | .*?Connection.+?error.*? | Connect | 3 | 2023-10-23T13:00:00.000Z ; multiple groupings with categorize and some constants including null required_capability: categorize_multiple_groupings FROM sample_data | STATS count=MV_COUNT(VALUES(message)) BY category=CATEGORIZE(message), null, constant="constant" | SORT category ; count:integer | category:keyword | null:null | constant:keyword 3 | .*?Connected.+?to.*? | null | constant 1 | .*?Connection.+?error.*? | null | constant 1 | .*?Disconnected.*? | null | constant ; multiple groupings with categorize and aggregation filters required_capability: categorize_multiple_groupings FROM employees | STATS lang_low=AVG(languages) WHERE salary<=50000, lang_high=AVG(languages) WHERE salary>50000 BY category=CATEGORIZE(job_positions), gender | SORT category, gender | LIMIT 5 ; lang_low:double | lang_high:double | category:keyword | gender:keyword 2.0 | 5.0 | .*?Accountant.*? | F 3.0 | 2.5 | .*?Accountant.*? | M 5.0 | 2.0 | .*?Accountant.*? | null 3.0 | 3.25 | .*?Architect.*? | F 3.75 | null | .*?Architect.*? | M ; multiple groupings with categorize on null row required_capability: categorize_multiple_groupings ROW message = null, str = ["a", "b", "c"] | STATS COUNT(), VALUES(str) BY category=CATEGORIZE(message), str | SORT str ; COUNT():long | VALUES(str):keyword | category:keyword | str:keyword 1 | [a, b, c] | null | a 1 | [a, b, c] | null | b 1 | [a, b, c] | null | c ;