Skip to content

Commit 8581dba

Browse files
committed
[CALCITE-6388] PsTableFunction throws NumberFormatException when the 'user' column has spaces
1 parent 73846cc commit 8581dba

File tree

2 files changed

+231
-85
lines changed

2 files changed

+231
-85
lines changed

plus/src/main/java/org/apache/calcite/adapter/os/PsTableFunction.java

Lines changed: 158 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,16 @@
2626
import org.apache.calcite.sql.type.SqlTypeName;
2727
import org.apache.calcite.util.Util;
2828

29+
import com.google.common.annotations.VisibleForTesting;
2930
import com.google.common.collect.ImmutableList;
31+
import com.google.common.collect.ImmutableMap;
3032

3133
import org.checkerframework.checker.nullness.qual.Nullable;
3234

3335
import java.util.List;
3436
import java.util.regex.Matcher;
3537
import java.util.regex.Pattern;
38+
import java.util.stream.Collectors;
3639

3740
/**
3841
* Table function that executes the OS "ps" command
@@ -43,114 +46,184 @@ public class PsTableFunction {
4346
Pattern.compile("([0-9]+):([0-9]+):([0-9]+)");
4447
private static final Pattern HOUR_MINUTE_SECOND_PATTERN =
4548
Pattern.compile("([0-9]+):([0-9]+)\\.([0-9]+)");
49+
private static final Pattern NUMERIC_PATTERN = Pattern.compile("(\\d+)");
50+
51+
// it acts as a partial mapping, missing entries are the identity (e.g., "user" -> "user")
52+
private static final ImmutableMap<String, String> UNIX_TO_MAC_PS_FIELDS =
53+
ImmutableMap.<String, String>builder()
54+
.put("pgrp", "pgid")
55+
.put("start_time", "lstart")
56+
.put("euid", "uid")
57+
.build();
58+
59+
private static final List<String> PS_FIELD_NAMES =
60+
ImmutableList.of("user",
61+
"pid",
62+
"ppid",
63+
"pgrp",
64+
"tpgid",
65+
"stat",
66+
"pcpu",
67+
"pmem",
68+
"vsz",
69+
"rss",
70+
"tty",
71+
"start_time",
72+
"time",
73+
"euid",
74+
"ruid",
75+
"sess",
76+
"comm");
4677

4778
private PsTableFunction() {
79+
throw new AssertionError("Utility class should not be instantiated");
80+
}
81+
82+
/**
83+
* Class for parsing, line by line, the output of the ps command for a
84+
* predefined list of parameters.
85+
*/
86+
@VisibleForTesting
87+
protected static class LineParser implements Function1<String, Object[]> {
88+
89+
@Override public Object[] apply(String line) {
90+
final String[] tokens = line.trim().split(" +");
91+
final Object[] values = new Object[PS_FIELD_NAMES.size()];
92+
93+
if (tokens.length < PS_FIELD_NAMES.size()) {
94+
throw new IllegalArgumentException(
95+
"Expected at least " + PS_FIELD_NAMES.size() + ", got " + tokens.length);
96+
}
97+
98+
int fieldIdx = 0;
99+
int processedTokens = 0;
100+
// more tokens than fields, either "user" or "comm" (or both) contain whitespaces, we assume
101+
// usernames don't have numeric parts separated by whitespaces (e.g., "root 123"), therefore
102+
// we stop whenever we find a numeric token assuming it's the "pid" and "user" is over
103+
if (tokens.length > PS_FIELD_NAMES.size()) {
104+
StringBuilder sb = new StringBuilder();
105+
for (String field : tokens) {
106+
if (NUMERIC_PATTERN.matcher(field).matches()) {
107+
break;
108+
}
109+
processedTokens++;
110+
sb.append(field).append(" ");
111+
}
112+
values[fieldIdx] =
113+
field(PS_FIELD_NAMES.get(fieldIdx), sb.deleteCharAt(sb.length() - 1).toString());
114+
fieldIdx++;
115+
}
116+
117+
for (; fieldIdx < values.length - 1; fieldIdx++) {
118+
try {
119+
values[fieldIdx] = field(PS_FIELD_NAMES.get(fieldIdx), tokens[processedTokens++]);
120+
} catch (RuntimeException e) {
121+
throw new RuntimeException("while parsing value ["
122+
+ tokens[fieldIdx] + "] of field [" + PS_FIELD_NAMES.get(fieldIdx)
123+
+ "] in line [" + line + "]");
124+
}
125+
}
126+
127+
// spaces also in the "comm" part
128+
if (processedTokens < tokens.length - 1) {
129+
StringBuilder sb = new StringBuilder();
130+
while (processedTokens < tokens.length) {
131+
sb.append(tokens[processedTokens++]).append(" ");
132+
}
133+
values[fieldIdx] =
134+
field(PS_FIELD_NAMES.get(fieldIdx), sb.deleteCharAt(sb.length() - 1).toString());
135+
} else {
136+
values[fieldIdx] = field(PS_FIELD_NAMES.get(fieldIdx), tokens[processedTokens]);
137+
}
138+
return values;
139+
}
140+
141+
private Object field(String field, String value) {
142+
switch (field) {
143+
case "pid":
144+
case "ppid":
145+
case "pgrp": // linux only; macOS equivalent is "pgid"
146+
case "pgid": // see "pgrp"
147+
case "tpgid":
148+
return Integer.valueOf(value);
149+
case "pcpu":
150+
case "pmem":
151+
return (int) (Float.parseFloat(value) * 10f);
152+
case "time":
153+
final Matcher m1 =
154+
MINUTE_SECOND_MILLIS_PATTERN.matcher(value);
155+
if (m1.matches()) {
156+
final long h = Long.parseLong(m1.group(1));
157+
final long m = Long.parseLong(m1.group(2));
158+
final long s = Long.parseLong(m1.group(3));
159+
return h * 3600000L + m * 60000L + s * 1000L;
160+
}
161+
final Matcher m2 =
162+
HOUR_MINUTE_SECOND_PATTERN.matcher(value);
163+
if (m2.matches()) {
164+
final long m = Long.parseLong(m2.group(1));
165+
final long s = Long.parseLong(m2.group(2));
166+
StringBuilder g3 = new StringBuilder(m2.group(3));
167+
while (g3.length() < 3) {
168+
g3.append("0");
169+
}
170+
final long millis = Long.parseLong(g3.toString());
171+
return m * 60000L + s * 1000L + millis;
172+
}
173+
return 0L;
174+
case "start_time": // linux only; macOS version is "lstart"
175+
case "lstart": // see "start_time"
176+
case "euid": // linux only; macOS equivalent is "uid"
177+
case "uid": // see "euid"
178+
default:
179+
return value;
180+
}
181+
}
48182
}
49183

50184
public static ScannableTable eval(boolean b) {
51185
return new AbstractBaseScannableTable() {
52186
@Override public Enumerable<@Nullable Object[]> scan(DataContext root) {
53187
final RelDataType rowType = getRowType(root.getTypeFactory());
54-
final List<String> fieldNames =
55-
ImmutableList.copyOf(rowType.getFieldNames());
188+
final List<String> fieldNames = ImmutableList.copyOf(rowType.getFieldNames());
56189
final String[] args;
57190
final String osName = System.getProperty("os.name");
58191
final String osVersion = System.getProperty("os.version");
59192
Util.discard(osVersion);
60193
switch (osName) {
61194
case "Mac OS X": // tested on version 10.12.5
62195
args = new String[] {
63-
"ps", "ax", "-o", "ppid=,pid=,pgid=,tpgid=,stat=,"
64-
+ "user=,pcpu=,pmem=,vsz=,rss=,tty=,start=,time=,uid=,ruid=,"
65-
+ "sess=,comm="};
196+
"ps", "ax", "-o",
197+
fieldNames.stream()
198+
.map(s -> UNIX_TO_MAC_PS_FIELDS.getOrDefault(s, s) + "=")
199+
.collect(Collectors.joining(","))};
66200
break;
67201
default:
68202
args = new String[] {
69-
"ps", "--no-headers", "axo", "ppid,pid,pgrp,"
70-
+ "tpgid,stat,user,pcpu,pmem,vsz,rss,tty,start_time,time,euid,"
71-
+ "ruid,sess,comm"};
203+
"ps", "--no-headers", "axo", String.join(",", fieldNames)};
72204
}
73-
return Processes.processLines(args)
74-
.select(
75-
new Function1<String, Object[]>() {
76-
@Override public Object[] apply(String line) {
77-
final String[] fields = line.trim().split(" +");
78-
final Object[] values = new Object[fieldNames.size()];
79-
for (int i = 0; i < values.length; i++) {
80-
try {
81-
values[i] = field(fieldNames.get(i), fields[i]);
82-
} catch (RuntimeException e) {
83-
throw new RuntimeException("while parsing value ["
84-
+ fields[i] + "] of field [" + fieldNames.get(i)
85-
+ "] in line [" + line + "]");
86-
}
87-
}
88-
return values;
89-
}
90-
91-
private Object field(String field, String value) {
92-
switch (field) {
93-
case "pid":
94-
case "ppid":
95-
case "pgrp": // linux only; macOS equivalent is "pgid"
96-
case "pgid": // see "pgrp"
97-
case "tpgid":
98-
return Integer.valueOf(value);
99-
case "pcpu":
100-
case "pmem":
101-
return (int) (Float.valueOf(value) * 10f);
102-
case "time":
103-
final Matcher m1 =
104-
MINUTE_SECOND_MILLIS_PATTERN.matcher(value);
105-
if (m1.matches()) {
106-
final long h = Long.parseLong(m1.group(1));
107-
final long m = Long.parseLong(m1.group(2));
108-
final long s = Long.parseLong(m1.group(3));
109-
return h * 3600000L + m * 60000L + s * 1000L;
110-
}
111-
final Matcher m2 =
112-
HOUR_MINUTE_SECOND_PATTERN.matcher(value);
113-
if (m2.matches()) {
114-
final long m = Long.parseLong(m2.group(1));
115-
final long s = Long.parseLong(m2.group(2));
116-
String g3 = m2.group(3);
117-
while (g3.length() < 3) {
118-
g3 = g3 + "0";
119-
}
120-
final long millis = Long.parseLong(g3);
121-
return m * 60000L + s * 1000L + millis;
122-
}
123-
return 0L;
124-
case "start_time": // linux only; macOS version is "lstart"
125-
case "lstart": // see "start_time"
126-
case "euid": // linux only; macOS equivalent is "uid"
127-
case "uid": // see "euid"
128-
default:
129-
return value;
130-
}
131-
}
132-
});
205+
return Processes.processLines(args).select(new LineParser());
133206
}
134207

135208
@Override public RelDataType getRowType(RelDataTypeFactory typeFactory) {
136209
return typeFactory.builder()
137-
.add("pid", SqlTypeName.INTEGER)
138-
.add("ppid", SqlTypeName.INTEGER)
139-
.add("pgrp", SqlTypeName.INTEGER)
140-
.add("tpgid", SqlTypeName.INTEGER)
141-
.add("stat", SqlTypeName.VARCHAR)
142-
.add("user", SqlTypeName.VARCHAR)
143-
.add("pcpu", SqlTypeName.DECIMAL, 3, 1)
144-
.add("pmem", SqlTypeName.DECIMAL, 3, 1)
145-
.add("vsz", SqlTypeName.INTEGER)
146-
.add("rss", SqlTypeName.INTEGER)
147-
.add("tty", SqlTypeName.VARCHAR)
148-
.add("start_time", SqlTypeName.VARCHAR)
149-
.add("time", TimeUnit.HOUR, -1, TimeUnit.SECOND, 0)
150-
.add("euid", SqlTypeName.VARCHAR)
151-
.add("ruid", SqlTypeName.VARCHAR)
152-
.add("sess", SqlTypeName.VARCHAR)
153-
.add("command", SqlTypeName.VARCHAR)
210+
.add(PS_FIELD_NAMES.get(0), SqlTypeName.VARCHAR)
211+
.add(PS_FIELD_NAMES.get(1), SqlTypeName.INTEGER)
212+
.add(PS_FIELD_NAMES.get(2), SqlTypeName.INTEGER)
213+
.add(PS_FIELD_NAMES.get(3), SqlTypeName.INTEGER)
214+
.add(PS_FIELD_NAMES.get(4), SqlTypeName.INTEGER)
215+
.add(PS_FIELD_NAMES.get(5), SqlTypeName.VARCHAR)
216+
.add(PS_FIELD_NAMES.get(6), SqlTypeName.DECIMAL, 3, 1)
217+
.add(PS_FIELD_NAMES.get(7), SqlTypeName.DECIMAL, 3, 1)
218+
.add(PS_FIELD_NAMES.get(8), SqlTypeName.INTEGER)
219+
.add(PS_FIELD_NAMES.get(9), SqlTypeName.INTEGER)
220+
.add(PS_FIELD_NAMES.get(10), SqlTypeName.VARCHAR)
221+
.add(PS_FIELD_NAMES.get(11), SqlTypeName.VARCHAR)
222+
.add(PS_FIELD_NAMES.get(12), TimeUnit.HOUR, -1, TimeUnit.SECOND, 0)
223+
.add(PS_FIELD_NAMES.get(13), SqlTypeName.VARCHAR)
224+
.add(PS_FIELD_NAMES.get(14), SqlTypeName.VARCHAR)
225+
.add(PS_FIELD_NAMES.get(15), SqlTypeName.VARCHAR)
226+
.add(PS_FIELD_NAMES.get(16), SqlTypeName.VARCHAR)
154227
.build();
155228
}
156229
};
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to you under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.calcite.adapter.os;
18+
19+
import com.google.common.collect.ImmutableList;
20+
21+
import org.junit.jupiter.api.Test;
22+
23+
import java.util.ArrayList;
24+
import java.util.Arrays;
25+
import java.util.HashMap;
26+
import java.util.List;
27+
import java.util.Map;
28+
29+
import static org.hamcrest.CoreMatchers.is;
30+
import static org.hamcrest.MatcherAssert.assertThat;
31+
32+
/**
33+
* Unit tests for the ps (process status) table function.
34+
*/
35+
class PsTableFunctionTest {
36+
37+
/** Test case for
38+
* <a href="https://issues.apache.org/jira/browse/CALCITE-6388">[CALCITE-6388]
39+
* PsTableFunction throws NumberFormatException when the 'user' column has spaces</a>.
40+
*/
41+
@Test void testPsInfoParsing() {
42+
final List<String> input = new ArrayList<>();
43+
input.add("startup user 56399 1 56399 0 S 0.0 0.0 410348128 6672 ??"
44+
+ " 3:25PM 0:00.22 501 501 0 /usr/lib exec/trustd");
45+
input.add("root 1 107 107 0 Ss 0.0 0.0 410142784 4016 ??"
46+
+ " 11Apr24 0:52.32 0 0 0 "
47+
+ "/System/Library/PrivateFrameworks/Uninstall.framework/Resources/uninstalld");
48+
input.add("user.name 1 1661 1661 0 S 0.7 0.2 412094800 75232 ?? "
49+
+ "11Apr24 325:33.63 775020228 775020228 0 "
50+
+ "/System/Library/CoreServices/ControlCenter app/Contents/MacOS/ControlCenter");
51+
52+
final List<List<Object>> output =
53+
ImmutableList.of(
54+
Arrays.asList("startup user", 56399, 1, 56399, 0, "S", 0, 0, "410348128", "6672", "??",
55+
"3:25PM", 220L, "501", "501", "0", "/usr/lib exec/trustd"),
56+
Arrays.asList("root", 1, 107, 107, 0, "Ss", 0, 0, "410142784", "4016", "??",
57+
"11Apr24", 52320L, "0", "0", "0",
58+
"/System/Library/PrivateFrameworks/Uninstall.framework/Resources/uninstalld"),
59+
Arrays.asList("user.name", 1, 1661, 1661, 0, "S", 7, 2, "412094800", "75232", "??",
60+
"11Apr24", 19533630L, "775020228", "775020228", "0",
61+
"/System/Library/CoreServices/ControlCenter app/Contents/MacOS/ControlCenter"));
62+
63+
final Map<String, List<Object>> testValues = new HashMap<>();
64+
for (int i = 0; i < input.size(); i++) {
65+
testValues.put(input.get(i), output.get(i));
66+
}
67+
68+
final PsTableFunction.LineParser psLineParser = new PsTableFunction.LineParser();
69+
for (Map.Entry<String, List<Object>> e : testValues.entrySet()) {
70+
assertThat(psLineParser.apply(e.getKey()), is(e.getValue().toArray()));
71+
}
72+
}
73+
}

0 commit comments

Comments
 (0)