|
36 | 36 |
|
37 | 37 | from pyspark.sql import SQLContext, HiveContext, Column |
38 | 38 | from pyspark.sql.types import IntegerType, Row, ArrayType, StructType, StructField, \ |
39 | | - UserDefinedType, DoubleType, LongType, StringType |
| 39 | + UserDefinedType, DoubleType, LongType, StringType, _infer_type |
40 | 40 | from pyspark.tests import ReusedPySparkTestCase |
41 | 41 |
|
42 | 42 |
|
@@ -210,6 +210,28 @@ def test_struct_in_map(self): |
210 | 210 | self.assertEqual(1, k.i) |
211 | 211 | self.assertEqual("", v.s) |
212 | 212 |
|
| 213 | + def test_infer_long_type(self): |
| 214 | + longrow = [Row(f1='a', f2=100000000000000)] |
| 215 | + lrdd = self.sc.parallelize(longrow) |
| 216 | + slrdd = self.sqlCtx.inferSchema(lrdd) |
| 217 | + self.assertEqual(slrdd.schema().fields[1].dataType, LongType()) |
| 218 | + |
| 219 | + # this saving as Parquet caused issues as well. |
| 220 | + output_dir = os.path.join(self.tempdir.name, "infer_long_type") |
| 221 | + slrdd.saveAsParquetFile(output_dir) |
| 222 | + df1 = self.sqlCtx.parquetFile(output_dir) |
| 223 | + self.assertEquals('a', df1.first().f1) |
| 224 | + self.assertEquals(100000000000000, df1.first().f2) |
| 225 | + |
| 226 | + self.assertEquals(point, ExamplePoint(1.0, 2.0)) |
| 227 | + self.assertEqual(_infer_type(1), IntegerType()) |
| 228 | + self.assertEqual(_infer_type(2**10), IntegerType()) |
| 229 | + self.assertEqual(_infer_type(2**20), IntegerType()) |
| 230 | + self.assertEqual(_infer_type(2**31 - 1), IntegerType()) |
| 231 | + self.assertEqual(_infer_type(2**31), LongType()) |
| 232 | + self.assertEqual(_infer_type(2**61), LongType()) |
| 233 | + self.assertEqual(_infer_type(2**71), LongType()) |
| 234 | + |
213 | 235 | def test_convert_row_to_dict(self): |
214 | 236 | row = Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")}) |
215 | 237 | self.assertEqual(1, row.asDict()['l'][0].a) |
|
0 commit comments