Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion src/main/java/joinery/DataFrame.java
Original file line number Diff line number Diff line change
Expand Up @@ -2172,9 +2172,23 @@ public static final DataFrame<Object> readCsv(final InputStream input, final Str
* @param file the file to write
* @throws IOException if an error occurs writing the file
*/
// CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public final void writeCsv(final String file)
throws IOException {
Serialization.writeCsv(this, new FileOutputStream(file));
Serialization.writeCsv(this, file);
}

/**
* Write the data from this data frame to the specified file as csv.
*
* @param file the file to write
* @param writeRowNames whether to include row names
* @throws IOException if an error occurs writing the file
*/
// CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public final void writeCsv(final String file, final boolean writeRowNames)
throws IOException {
Serialization.writeCsv(this, file, writeRowNames);
}

/**
Expand All @@ -2184,11 +2198,25 @@ public final void writeCsv(final String file)
* @param output
* @throws IOException
*/
// CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public final void writeCsv(final OutputStream output)
throws IOException {
Serialization.writeCsv(this, output);
}

/**
* Write the data from this data frame to the provided output stream as csv.
*
* @param output
* * @param writeRowNames whether to include row names
* @throws IOException
*/
// CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public final void writeCsv(final OutputStream output, final boolean writeRowNames)
throws IOException {
Serialization.writeCsv(this, output, writeRowNames);
}

/**
* Read data from the specified excel
* workbook into a new data frame.
Expand Down
72 changes: 71 additions & 1 deletion src/main/java/joinery/impl/Serialization.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Collection;

import org.apache.poi.hssf.usermodel.HSSFDataFormat;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
Expand Down Expand Up @@ -290,14 +291,83 @@ public static DataFrame<Object> readCsv(final InputStream input, String separato
}
}

/**
* Write the data from data frame to the specified file as csv with no row names.
*
* @param output the file to write to
* @param df data frame to write from
* @throws IOException if an error occurs writing the file
*/
public static <V> void writeCsv(final DataFrame<V> df, final String output)
throws IOException {
writeCsv(df, new FileOutputStream(output));
}

/**
* Write the data from data frame to the specified file as csv.
*
* @param output the file to write to
* @param df data frame to write from
* @param writeRowNames whether to include row names
* @throws IOException if an error occurs writing the file
*/
// CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public static <V> void writeCsv(final DataFrame<V> df, final String output, final boolean writeRowNames)
throws IOException {
writeCsv(df, new FileOutputStream(output), writeRowNames);
}

/**
* Write the data from data frame to the specified file as csv with no row names.
*
* @param output the file to write to
* @param df data frame to write from
* @throws IOException if an error occurs writing the file
*/
// CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public static <V> void writeCsv(final DataFrame<V> df, final OutputStream output)
throws IOException {
throws IOException {
writeCsv(df, output, false);
}

/**
* Add row names to data frame as the
* first column.
*
* @param df data frame to add to
* @return data frame with the row names
*/
// CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public static <V> DataFrame<V> addRowNames(final DataFrame<V> df) {
DataFrame<V> dff = new DataFrame<>();
final List<V> indexes = new ArrayList<>((Collection<? extends V>) df.index());
dff.add("", indexes);
dff = df.join(dff, DataFrame.JoinType.RIGHT);
for (int c = 0; c < df.size(); c++) {
final int sizeOfCol = df.length();
for (int r = 0; r < sizeOfCol; r++) {
dff.set(r, c+1, df.get(r, c));
}
}
return dff;
}

/**
* Write the data from data frame to the specified file as csv.
*
* @param output the file to write to
* @param df data frame to write from
* @param writeRowNames whether to include row names
* @throws IOException if an error occurs writing the file
*/
// CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public static <V> void writeCsv(final DataFrame<V> df, final OutputStream output, final boolean writeRowNames)
throws IOException {
try (CsvListWriter writer = new CsvListWriter(new OutputStreamWriter(output), CsvPreference.STANDARD_PREFERENCE)) {
if (writeRowNames) {
writeCsv(addRowNames(df), output, false);
return;
}
final String[] header = new String[df.size()];
final Iterator<Object> it = df.columns().iterator();
for (int c = 0; c < df.size(); c++) {
Expand Down
61 changes: 60 additions & 1 deletion src/test/java/joinery/DataFrameSerializationTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ public void testReadWriteCsvTypes()
);
}

@Test
@Test // CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public void testWriteCsvNonStringIndex()
throws IOException {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
Expand All @@ -186,6 +186,65 @@ public void testWriteCsvNonStringIndex()
assertTrue("writeCsv does not throw due to non-string indices", true);
}

/**
* Test to write a csv with row names enabled
*
* @throws IOException if an error occurs writing the file
*/
@Test // CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public void testWriteCsvWithRowNames()
throws IOException {
df = new DataFrame<>(
Arrays.<Object>asList("row1", "row2", "row3", "row4", "row5", "row6"),
Arrays.<Object>asList("category", "name", "value"),
Arrays.asList(
Arrays.<Object>asList("a", "a", "b", "b", "c", "c"),
Arrays.<Object>asList("alpha", "bravo", "charlie", "delta", "echo", "foxtrot"),
Arrays.<Object>asList(1, 2, 3, 4, 5, 6)
)
);
final File tmp = File.createTempFile(getClass().getName(), ".csv");
tmp.deleteOnExit();
df.writeCsv(tmp.getPath(), true);
for (int i = 0; i < df.size() + 1; i++) {
assertArrayEquals(
"Checking if reading the csv has the row names",
DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization_row_names.csv")).col(0).toArray(),
DataFrame.readCsv(tmp.getPath()).col(0).toArray()
);
}
}


/**
* Test to write a csv with row names disabled
*
* @throws IOException if an error occurs writing the file
*/
@Test // CS427 Issue link: https://github.com/cardillo/joinery/issues/51
public void testWriteCsvWithoutRowNames()
throws IOException {
df = new DataFrame<>(
Arrays.<Object>asList("row1", "row2", "row3", "row4", "row5", "row6"),
Arrays.<Object>asList("category", "name", "value"),
Arrays.asList(
Arrays.<Object>asList("a", "a", "b", "b", "c", "c"),
Arrays.<Object>asList("alpha", "bravo", "charlie", "delta", "echo", "foxtrot"),
Arrays.<Object>asList(1, 2, 3, 4, 5, 6)
)
);
final File tmp = File.createTempFile(getClass().getName(), ".csv");
tmp.deleteOnExit();
df.writeCsv(new FileOutputStream(tmp), false);
for (int i = 0; i < df.size(); i++) {
assertArrayEquals(
"Checking if reading the csv does not have the row names",
DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization.csv")).col(0).toArray(),
DataFrame.readCsv(tmp.getPath()).col(0).toArray()
);
}
}

@Test(expected=FileNotFoundException.class)
public void testReadXlsString()
throws IOException {
Expand Down
7 changes: 7 additions & 0 deletions src/test/resources/serialization_row_names.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
,category,name,value
row1,a,alpha,1
row2,a,bravo,2
row3,b,charlie,3
row4,b,delta,4
row5,c,echo,5
row6,c,foxtrot,6