Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 61 additions & 60 deletions src/Series.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "openPMD/Series.hpp"
#include "openPMD/version.hpp"

#include <cctype>
#include <exception>
#include <iomanip>
#include <iostream>
Expand Down Expand Up @@ -76,8 +77,11 @@ namespace
* bool is True if file could be of type f and matches the iterationEncoding. False otherwise.
* int is the amount of padding present in the iteration number %T. Is 0 if bool is False.
*/
std::function<Match(std::string const &)>
matcher(std::string const &prefix, int padding, std::string const &postfix, Format f);
std::function<Match(std::string const &)> matcher(
std::string const &prefix,
int padding,
std::string const &postfix,
Format f);
} // namespace [anonymous]

struct SeriesInterface::ParsedInput
Expand Down Expand Up @@ -488,6 +492,21 @@ void SeriesInterface::init(
series.m_filenamePostfix = input->filenamePostfix;
series.m_filenamePadding = input->filenamePadding;

if( series.m_iterationEncoding == IterationEncoding::fileBased &&
!series.m_filenamePrefix.empty() &&
std::isdigit( static_cast< unsigned char >(
*series.m_filenamePrefix.rbegin() ) ) )
{
std::cerr << R"END(
[Warning] In file-based iteration encoding, it is strongly recommended to avoid
digits as the last characters of the filename prefix.
For instance, a robust pattern is to prepend the expansion pattern
of the filename with an underscore '_'.
Example: 'data_%T.json' or 'simOutput_%06T.h5'
Given file pattern: ')END"
<< series.m_name << "'" << std::endl;
}

if(IOHandler()->m_frontendAccess == Access::READ_ONLY || IOHandler()->m_frontendAccess == Access::READ_WRITE )
{
/* Allow creation of values in Containers and setting of Attributes
Expand Down Expand Up @@ -1613,71 +1632,53 @@ namespace
}

std::function<Match(std::string const &)>
buildMatcher(std::string const &regexPattern) {
buildMatcher(std::string const &regexPattern, int padding) {
std::regex pattern(regexPattern);

return [pattern](std::string const &filename) -> Match {
return [pattern, padding](std::string const &filename) -> Match {
std::smatch regexMatches;
bool match = std::regex_match(filename, regexMatches, pattern);
int padding = match ? regexMatches[1].length() : 0;
return {match, padding, match ? std::stoull(regexMatches[1]) : 0};
};
int processedPadding = padding != 0
? padding
: ( match ? regexMatches[ 1 ].length() : 0 );
return {
match,
processedPadding,
match ? std::stoull( regexMatches[ 1 ] ) : 0 }; };
}

std::function<Match(std::string const &)>
matcher(std::string const &prefix, int padding, std::string const &postfix, Format f) {
switch (f) {
case Format::HDF5: {
std::string nameReg = "^" + prefix + "([[:digit:]]";
if (padding != 0)
nameReg += "{" + std::to_string(padding) + "}";
else
nameReg += "+";
nameReg += +")" + postfix + ".h5$";
return buildMatcher(nameReg);
}
case Format::ADIOS1:
case Format::ADIOS2: {
std::string nameReg = "^" + prefix + "([[:digit:]]";
if (padding != 0)
nameReg += "{" + std::to_string(padding) + "}";
else
nameReg += "+";
nameReg += +")" + postfix + ".bp$";
return buildMatcher(nameReg);
}
case Format::ADIOS2_SST:
{
std::string nameReg = "^" + prefix + "([[:digit:]]";
if( padding != 0 )
nameReg += "{" + std::to_string(padding) + "}";
else
nameReg += "+";
nameReg += + ")" + postfix + ".sst$";
return buildMatcher(nameReg);
}
case Format::ADIOS2_SSC:
{
std::string nameReg = "^" + prefix + "([[:digit:]]";
if( padding != 0 )
nameReg += "{" + std::to_string(padding) + "}";
else
nameReg += "+";
nameReg += + ")" + postfix + ".ssc$";
return buildMatcher(nameReg);
}
case Format::JSON: {
std::string nameReg = "^" + prefix + "([[:digit:]]";
if (padding != 0)
nameReg += "{" + std::to_string(padding) + "}";
else
nameReg += "+";
nameReg += +")" + postfix + ".json$";
return buildMatcher(nameReg);
}
default:
return [](std::string const &) -> Match { return {false, 0, 0}; };
std::function<Match(std::string const &)> matcher(
std::string const &prefix,
int padding,
std::string const &postfix,
Format f)
{
std::string filenameSuffix = suffix( f );
if( filenameSuffix.empty() )
{
return [](std::string const &) -> Match { return {false, 0, 0}; };
}

std::string nameReg = "^" + prefix;
if (padding != 0)
{
// The part after the question mark:
// The number must be at least `padding` digits long
// The part before the question mark:
// It may be longer than that only if the first digit is not zero
// The outer pair of parentheses is for later extraction of the
// iteration number via std::stoull(regexMatches[1])
nameReg += "(([1-9][[:digit:]]*)?([[:digit:]]";
nameReg += "{" + std::to_string(padding) + "}))";
}
else
{
// No padding specified, any number of digits is ok.
nameReg += "([[:digit:]]";
nameReg += "+)";
}
nameReg += postfix + filenameSuffix + "$";
return buildMatcher(nameReg, padding);
}
} // namespace [anonymous]
} // namespace openPMD
95 changes: 75 additions & 20 deletions test/SerialIOTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1477,7 +1477,7 @@ void fileBased_write_test(const std::string & backend)
auxiliary::remove_directory("../samples/subdir");

{
Series o = Series("../samples/subdir/serial_fileBased_write%08T." + backend, Access::CREATE);
Series o = Series("../samples/subdir/serial_fileBased_write%03T." + backend, Access::CREATE);

ParticleSpecies& e_1 = o.iterations[1].particles["e"];

Expand Down Expand Up @@ -1552,12 +1552,12 @@ void fileBased_write_test(const std::string & backend)
o.flush();
o.iterations[5].setTime(static_cast< double >(5));
}
REQUIRE((auxiliary::file_exists("../samples/subdir/serial_fileBased_write00000001." + backend)
|| auxiliary::directory_exists("../samples/subdir/serial_fileBased_write00000001." + backend)));
REQUIRE((auxiliary::file_exists("../samples/subdir/serial_fileBased_write00000002." + backend)
|| auxiliary::directory_exists("../samples/subdir/serial_fileBased_write00000002." + backend)));
REQUIRE((auxiliary::file_exists("../samples/subdir/serial_fileBased_write00000003." + backend)
|| auxiliary::directory_exists("../samples/subdir/serial_fileBased_write00000003." + backend)));
REQUIRE((auxiliary::file_exists("../samples/subdir/serial_fileBased_write001." + backend)
|| auxiliary::directory_exists("../samples/subdir/serial_fileBased_write001." + backend)));
REQUIRE((auxiliary::file_exists("../samples/subdir/serial_fileBased_write002." + backend)
|| auxiliary::directory_exists("../samples/subdir/serial_fileBased_write002." + backend)));
REQUIRE((auxiliary::file_exists("../samples/subdir/serial_fileBased_write003." + backend)
|| auxiliary::directory_exists("../samples/subdir/serial_fileBased_write003." + backend)));

{
Series o = Series("../samples/subdir/serial_fileBased_write%T." + backend, Access::READ_ONLY);
Expand All @@ -1570,12 +1570,12 @@ void fileBased_write_test(const std::string & backend)
REQUIRE(o.iterations.count(5) == 1);

#if openPMD_USE_INVASIVE_TESTS
REQUIRE(o.get().m_filenamePadding == 8);
REQUIRE(o.get().m_filenamePadding == 3);
#endif

REQUIRE(o.basePath() == "/data/%T/");
REQUIRE(o.iterationEncoding() == IterationEncoding::fileBased);
REQUIRE(o.iterationFormat() == "serial_fileBased_write%08T");
REQUIRE(o.iterationFormat() == "serial_fileBased_write%03T");
REQUIRE(o.openPMD() == "1.1.0");
REQUIRE(o.openPMDextension() == 1u);
REQUIRE(o.particlesPath() == "particles/");
Expand Down Expand Up @@ -1642,15 +1642,37 @@ void fileBased_write_test(const std::string & backend)
o.iterations[ 6 ]
.particles[ "e" ][ "position" ][ "x" ]
.makeConstant< double >( 1.0 );

// additional iteration with over-running iteration padding but similar content
// padding: 000
uint64_t const overlong_it = 123456;
o.iterations[ overlong_it ];
// write something to trigger opening of the file
o.iterations[ overlong_it ].particles[ "e" ][ "position" ][ "x" ].resetDataset(
{ Datatype::DOUBLE, { 12 } } );
o.iterations[ overlong_it ]
.particles[ "e" ][ "position" ][ "x" ]
.makeConstant< double >( 1.0 );

o.iterations[ overlong_it ].setTime(static_cast< double >(overlong_it));
REQUIRE(o.iterations.size() == 7);
}
REQUIRE((auxiliary::file_exists("../samples/subdir/serial_fileBased_write00000004." + backend)
|| auxiliary::directory_exists("../samples/subdir/serial_fileBased_write00000004." + backend)));
REQUIRE((auxiliary::file_exists("../samples/subdir/serial_fileBased_write004." + backend)
|| auxiliary::directory_exists("../samples/subdir/serial_fileBased_write004." + backend)));
REQUIRE((auxiliary::file_exists("../samples/subdir/serial_fileBased_write123456." + backend)
|| auxiliary::directory_exists("../samples/subdir/serial_fileBased_write123456." + backend)));

// additional iteration with different iteration padding but similar content
// additional iteration with shorter iteration padding but similar content
{
Series o = Series("../samples/subdir/serial_fileBased_write%01T." + backend, Access::READ_WRITE);

REQUIRE(o.iterations.empty());
REQUIRE(o.iterations.size() == 1);
/*
* 123456 has no padding, it's just a very long number.
* So even when opening the series with a padding of 1,
* that iteration will be opened.
*/
REQUIRE(o.iterations.count(123456) == 1);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was initially thinking here: should we read 123456 if our padding was specifically asked to be 00?

But if we would skip it, then this would be a bit inconsistent with reading unpadded numbers.
Thus, I think the logic change here in the test that you added is the proper way to handle this 👍

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we read 123456 if our padding was specifically asked to be 00?

Yep, it should, because that number is not padded ;)


auto& it = o.iterations[10];
ParticleSpecies& e = it.particles["e"];
Expand All @@ -1660,32 +1682,65 @@ void fileBased_write_test(const std::string & backend)
e["positionOffset"]["x"].makeConstant(1.23);

fileBased_add_EDpic(e, 42);
it.setTime(static_cast< double >(10));

REQUIRE(o.iterations.size() == 1);
REQUIRE(o.iterations.size() == 2);
}
REQUIRE((auxiliary::file_exists("../samples/subdir/serial_fileBased_write10." + backend)
|| auxiliary::directory_exists("../samples/subdir/serial_fileBased_write10." + backend)));

// read back with auto-detection and non-fixed padding
{
Series s = Series("../samples/subdir/serial_fileBased_write%T." + backend, Access::READ_ONLY);
REQUIRE(s.iterations.size() == 7);
REQUIRE(s.iterations.size() == 8);
REQUIRE(s.iterations.contains(4));
REQUIRE(s.iterations.contains(10));
REQUIRE(s.iterations.contains(123456));

REQUIRE(s.iterations[3].time< double >() == 3.0);
REQUIRE(s.iterations[4].time< double >() == 4.0);
REQUIRE(s.iterations[5].time< double >() == 5.0);
REQUIRE(s.iterations[10].time< double >() == 10.0);
REQUIRE(s.iterations[123456].time< double >() == double(123456));
}

// write with auto-detection and in-consistent padding
// write with auto-detection and in-consistent padding from step 10
{
REQUIRE_THROWS_WITH(Series("../samples/subdir/serial_fileBased_write%T." + backend, Access::READ_WRITE),
Catch::Equals("Cannot write to a series with inconsistent iteration padding. Please specify '%0<N>T' or open as read-only."));
}

// read back with auto-detection and fixed padding
// read back with fixed padding
{
Series s = Series("../samples/subdir/serial_fileBased_write%03T." + backend, Access::READ_ONLY);
REQUIRE(s.iterations.size() == 7);
REQUIRE(s.iterations.contains(4));
REQUIRE(!s.iterations.contains(10));
REQUIRE(s.iterations.contains(123456));

REQUIRE(s.iterations[3].time< double >() == 3.0);
REQUIRE(s.iterations[4].time< double >() == 4.0);
REQUIRE(s.iterations[5].time< double >() == 5.0);
}

// read back with auto-detection (allow relaxed/overflow padding)
{
Series s = Series("../samples/subdir/serial_fileBased_write%08T." + backend, Access::READ_ONLY);
REQUIRE(s.iterations.size() == 6);
Series s = Series("../samples/subdir/serial_fileBased_write%T." + backend, Access::READ_ONLY);
REQUIRE(s.iterations.size() == 8);
REQUIRE(s.iterations.contains(4));
REQUIRE(s.iterations.contains(10));
REQUIRE(s.iterations.contains(123456));

REQUIRE(s.iterations[3].time< double >() == 3.0);
REQUIRE(s.iterations[4].time< double >() == 4.0);
REQUIRE(s.iterations[5].time< double >() == 5.0);
REQUIRE(s.iterations[10].time< double >() == 10.0);
REQUIRE(s.iterations[123456].time< double >() ==
static_cast< double >(123456));
}

{
Series list{ "../samples/subdir/serial_fileBased_write%08T." + backend, Access::READ_ONLY };
Series list{ "../samples/subdir/serial_fileBased_write%03T." + backend, Access::READ_ONLY };
helper::listSeries( list );
}
}
Expand Down