Skip to content

Commit d2c8a3a

Browse files
committed
refactor, add code documentation
1 parent 2550427 commit d2c8a3a

File tree

2 files changed

+61
-63
lines changed

2 files changed

+61
-63
lines changed

aces.go

Lines changed: 58 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -123,15 +123,17 @@ func (bw *BitWriter) Flush() error {
123123
return err
124124
}
125125

126-
// Coding represents an encoding scheme like hex, base64, base32 etc.
127-
// It allows for any custom character set, such as "HhAa" and "😱📣".
128-
type Coding struct {
129-
charset []rune
130-
numOfBits uint8
126+
// Coding represents an encoding scheme for a character set. See NewCoding for more detail.
127+
type Coding interface {
128+
// Encode reads from src and encodes to dst
129+
Encode(dst io.Writer, src io.Reader) error
130+
// Decode reads from src and decodes to dst
131+
Decode(dst io.Writer, src io.Reader) error
131132
}
132133

133-
// NewCoding creates a new Coding with the given character set.
134-
// The length of the character set must be a power of 2 not larger than 256 and must not contain duplicate runes.
134+
// TODO: Update README
135+
136+
// NewCoding creates a new coding with the given character set.
135137
//
136138
// For example,
137139
//
@@ -142,7 +144,39 @@ type Coding struct {
142144
// NewCoding([]rune(" ❗"))
143145
//
144146
// creates a binary encoding scheme: 0s are represented by a space and 1s are represented by an exclamation mark.
145-
func NewCoding(charset []rune) (*Coding, error) {
147+
//
148+
// While a character set of any length can be used, those with power of 2 lengths (2, 4, 8, 16, 32, 64, 128, 256) use a
149+
// more optimized algorithm.
150+
//
151+
// Sets that are not power of 2 in length use an algorithm that may not have the same output as other encoders with the
152+
// same character set. For example, using the base58 character set does not mean that the output will be the same as a
153+
// base58-specific encoder.
154+
//
155+
// This is because most encoders interpret data as a number and use a base conversion algorithm to convert it to the
156+
// character set. For non-power-of-2 charsets, this requires all data to be read before encoding, which is not possible
157+
// with streams. To enable stream encoding for non-power-of-2 charsets, Aces converts 8 bytes of data at a time, which
158+
// is not the same as converting the base of the entire data.
159+
func NewCoding(charset []rune) (Coding, error) {
160+
seen := make(map[rune]bool)
161+
for _, r := range charset {
162+
if seen[r] {
163+
return nil, errors.New("charset contains duplicates: '" + string(r) + "'")
164+
}
165+
seen[r] = true
166+
}
167+
if len(charset)&(len(charset)-1) == 0 && len(charset) < 256 { // is power of 2?
168+
return newTwoCoding(charset)
169+
}
170+
return newAnyCoding(charset)
171+
}
172+
173+
// twoCoding is for character sets of a length that is a power of 2.
174+
type twoCoding struct {
175+
charset []rune
176+
numOfBits uint8
177+
}
178+
179+
func newTwoCoding(charset []rune) (*twoCoding, error) {
146180
numOfBits := uint8(math.Log2(float64(len(charset))))
147181
if 1<<numOfBits != len(charset) {
148182
numOfBits = uint8(math.Round(math.Log2(float64(len(charset)))))
@@ -151,18 +185,10 @@ func NewCoding(charset []rune) (*Coding, error) {
151185
"\n want: a power of 2 (nearest is", 1<<numOfBits, "which is", math.Abs(float64(len(charset)-1<<numOfBits)), "away)"),
152186
)
153187
}
154-
seen := make(map[rune]bool)
155-
for _, r := range charset {
156-
if seen[r] {
157-
return nil, errors.New("charset contains duplicates")
158-
}
159-
seen[r] = true
160-
}
161-
return &Coding{charset: charset, numOfBits: numOfBits}, nil
188+
return &twoCoding{charset: charset, numOfBits: numOfBits}, nil
162189
}
163190

164-
// Encode encodes data from src and writes to dst.
165-
func (c *Coding) Encode(dst io.Writer, src io.Reader) error {
191+
func (c *twoCoding) Encode(dst io.Writer, src io.Reader) error {
166192
bs, err := NewBitReader(c.numOfBits, src)
167193
if err != nil {
168194
panic(err)
@@ -192,8 +218,7 @@ func (c *Coding) Encode(dst io.Writer, src io.Reader) error {
192218
}
193219
}
194220

195-
// Decode decodes data from src and writes to dst.
196-
func (c *Coding) Decode(dst io.Writer, src io.Reader) error {
221+
func (c *twoCoding) Decode(dst io.Writer, src io.Reader) error {
197222
bw := NewBitWriter(c.numOfBits, dst)
198223
bufStdin := bufio.NewReaderSize(src, 10*1024)
199224
runeToByte := make(map[rune]byte, len(c.charset))
@@ -210,7 +235,10 @@ func (c *Coding) Decode(dst io.Writer, src io.Reader) error {
210235
}
211236
b, ok := runeToByte[r]
212237
if !ok {
213-
continue
238+
if r == '\n' || r == '\r' {
239+
continue
240+
}
241+
return errors.New("character " + string(r) + "in input is not in the character set")
214242
}
215243
err = bw.Write(b)
216244
if err != nil {
@@ -220,16 +248,17 @@ func (c *Coding) Decode(dst io.Writer, src io.Reader) error {
220248
return bw.Flush()
221249
}
222250

223-
type ImpureCoding struct {
251+
// anyCoding works with character sets of any length but is less performant than twoCoding.
252+
type anyCoding struct {
224253
charset []rune
225254
rPerOctet int
226255
}
227256

228-
func NewImpureCoding(charset []rune) (*ImpureCoding, error) {
229-
return &ImpureCoding{charset, runesPerOctet(charset)}, nil
257+
func newAnyCoding(charset []rune) (*anyCoding, error) {
258+
return &anyCoding{charset, runesPerOctet(charset)}, nil
230259
}
231260

232-
func (c *ImpureCoding) Encode(dst io.Writer, src io.Reader) error {
261+
func (c *anyCoding) Encode(dst io.Writer, src io.Reader) error {
233262
br := bufio.NewReaderSize(src, 10*1024)
234263
result := make([]rune, 0, 10*1024)
235264
buf := make([]byte, 8)
@@ -243,7 +272,6 @@ func (c *ImpureCoding) Encode(dst io.Writer, src io.Reader) error {
243272
}
244273

245274
result = append(result, encodeOctet(c.charset, buf, c.rPerOctet)...)
246-
//result = append(result, ' ')
247275

248276
if len(result)+64 > cap(result) {
249277
_, err = dst.Write([]byte(string(result)))
@@ -260,7 +288,6 @@ var resultBuf = make([]rune, 0, 64)
260288
func encodeOctet(set []rune, octet []byte, rPerOctet int) []rune {
261289
resultBuf = resultBuf[:0]
262290
i := bytesToInt(octet)
263-
//println(i.String(), rPerOctet)
264291
resultBuf = toBase(i, resultBuf, set)
265292
for len(resultBuf) < rPerOctet {
266293
// prepend with minimumum new allocations
@@ -279,9 +306,7 @@ func decodeToOctet(set []rune, runes []rune) ([]byte, error) {
279306
return num.FillBytes(make([]byte, 8)), nil
280307
}
281308

282-
// TODO. does not ignore non-charset runes in input. change the other encoding to also not tolerate those or change this one
283-
284-
func (c *ImpureCoding) Decode(dst io.Writer, src io.Reader) error {
309+
func (c *anyCoding) Decode(dst io.Writer, src io.Reader) error {
285310
var err error
286311

287312
br := bufio.NewReaderSize(src, 10*1024)
@@ -292,12 +317,12 @@ func (c *ImpureCoding) Decode(dst io.Writer, src io.Reader) error {
292317
for i := range buf {
293318
buf[i], _, err = br.ReadRune()
294319
if err != nil {
295-
if i == 0 && err == io.EOF {
320+
if err == io.EOF {
296321
_, err = dst.Write(result)
297322
}
298323
return err
299324
}
300-
if buf[i] == '\n' {
325+
if buf[i] == '\n' || buf[i] == '\r' {
301326
i-- // ignore newline, read rune again
302327
}
303328
}
@@ -354,7 +379,7 @@ func fromBase(enc []rune, set []rune) (*big.Int, error) {
354379
)
355380
idx := setMap[enc[i]]
356381
if idx == -1 {
357-
return nil, errors.New("could not decode " + string(enc) + ": rune " + string(enc[i]) + " is not in charset")
382+
return nil, errors.New("character " + string(enc[i]) + "in input is not in the character set")
358383
}
359384
mult.Mul(mult, big.NewInt(idx)) // multiply "place value" with the digit at spot i
360385
result.Add(result, mult)

cmd/aces/main.go

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ Usage:
1616
aces -d/--decode <charset> - decode data from STDIN from <charset>
1717
aces -h/--help - print this help message
1818
19-
Aces reads from STDIN for your data and outputs the result to STDOUT. The charset length must be
20-
a power of 2. While decoding, bytes not in the charset are ignored. Aces does not add any padding.
19+
Aces reads from STDIN for your data and outputs the result to STDOUT. An optimized algorithm is used
20+
for character sets with a power of 2 length. Newlines are ignored when decoding.
2121
2222
Examples:
2323
echo hello world | aces "<>(){}[]" | aces --decode "<>(){}[]" # basic usage
@@ -27,7 +27,7 @@ Examples:
2727
echo Calculus | aces 01 # what's stuff in binary?
2828
echo Aces™ | base64 | aces -d
2929
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/ # even decode base64
30-
30+
echo -n uwonsmth | aces 🥇🥈🥉 # emojis work too!
3131
Set the encoding/decoding buffer size with --bufsize <size> (default ` + strconv.Itoa(aces.BufSize) + ` bytes).
3232
3333
File issues, contribute or star at github.com/quackduck/aces`
@@ -36,14 +36,6 @@ File issues, contribute or star at github.com/quackduck/aces`
3636
func main() {
3737
var charset []rune
3838

39-
//i := aces.ImpureCoding{}
40-
//i.Charset = []rune("012")
41-
//err := i.Encode(os.Stdout, os.Stdin)
42-
//if err != nil {
43-
// fmt.Fprintln(os.Stderr, "error:", err)
44-
//}
45-
//return
46-
4739
if len(os.Args) == 1 {
4840
fmt.Fprintln(os.Stderr, "error: need at least one argument\n"+helpMsg)
4941
return
@@ -78,25 +70,6 @@ func main() {
7870
charset = []rune(os.Args[1])
7971
}
8072

81-
// check if charset length isn't a power of 2
82-
if len(charset)&(len(charset)-1) != 0 {
83-
c, err := aces.NewImpureCoding(charset)
84-
if err != nil {
85-
fmt.Fprintln(os.Stderr, "error:", err)
86-
return
87-
}
88-
if decode {
89-
err = c.Decode(os.Stdout, os.Stdin)
90-
} else {
91-
err = c.Encode(os.Stdout, os.Stdin)
92-
fmt.Println()
93-
}
94-
if err != nil {
95-
fmt.Fprintln(os.Stderr, "error:", err)
96-
}
97-
return
98-
}
99-
10073
c, err := aces.NewCoding(charset)
10174
if err != nil {
10275
fmt.Fprintln(os.Stderr, "error:", err)

0 commit comments

Comments
 (0)