@@ -123,15 +123,17 @@ func (bw *BitWriter) Flush() error {
123123 return err
124124}
125125
126- // Coding represents an encoding scheme like hex, base64, base32 etc.
127- // It allows for any custom character set, such as "HhAa" and "😱📣".
128- type Coding struct {
129- charset []rune
130- numOfBits uint8
126+ // Coding represents an encoding scheme for a character set. See NewCoding for more detail.
127+ type Coding interface {
128+ // Encode reads from src and encodes to dst
129+ Encode (dst io.Writer , src io.Reader ) error
130+ // Decode reads from src and decodes to dst
131+ Decode (dst io.Writer , src io.Reader ) error
131132}
132133
133- // NewCoding creates a new Coding with the given character set.
134- // The length of the character set must be a power of 2 not larger than 256 and must not contain duplicate runes.
134+ // TODO: Update README
135+
136+ // NewCoding creates a new coding with the given character set.
135137//
136138// For example,
137139//
@@ -142,7 +144,39 @@ type Coding struct {
142144// NewCoding([]rune(" ❗"))
143145//
144146// creates a binary encoding scheme: 0s are represented by a space and 1s are represented by an exclamation mark.
145- func NewCoding (charset []rune ) (* Coding , error ) {
147+ //
148+ // While a character set of any length can be used, those with power of 2 lengths (2, 4, 8, 16, 32, 64, 128, 256) use a
149+ // more optimized algorithm.
150+ //
151+ // Sets that are not power of 2 in length use an algorithm that may not have the same output as other encoders with the
152+ // same character set. For example, using the base58 character set does not mean that the output will be the same as a
153+ // base58-specific encoder.
154+ //
155+ // This is because most encoders interpret data as a number and use a base conversion algorithm to convert it to the
156+ // character set. For non-power-of-2 charsets, this requires all data to be read before encoding, which is not possible
157+ // with streams. To enable stream encoding for non-power-of-2 charsets, Aces converts 8 bytes of data at a time, which
158+ // is not the same as converting the base of the entire data.
159+ func NewCoding (charset []rune ) (Coding , error ) {
160+ seen := make (map [rune ]bool )
161+ for _ , r := range charset {
162+ if seen [r ] {
163+ return nil , errors .New ("charset contains duplicates: '" + string (r ) + "'" )
164+ }
165+ seen [r ] = true
166+ }
167+ if len (charset )& (len (charset )- 1 ) == 0 && len (charset ) < 256 { // is power of 2?
168+ return newTwoCoding (charset )
169+ }
170+ return newAnyCoding (charset )
171+ }
172+
173+ // twoCoding is for character sets of a length that is a power of 2.
174+ type twoCoding struct {
175+ charset []rune
176+ numOfBits uint8
177+ }
178+
179+ func newTwoCoding (charset []rune ) (* twoCoding , error ) {
146180 numOfBits := uint8 (math .Log2 (float64 (len (charset ))))
147181 if 1 << numOfBits != len (charset ) {
148182 numOfBits = uint8 (math .Round (math .Log2 (float64 (len (charset )))))
@@ -151,18 +185,10 @@ func NewCoding(charset []rune) (*Coding, error) {
151185 "\n want: a power of 2 (nearest is" , 1 << numOfBits , "which is" , math .Abs (float64 (len (charset )- 1 << numOfBits )), "away)" ),
152186 )
153187 }
154- seen := make (map [rune ]bool )
155- for _ , r := range charset {
156- if seen [r ] {
157- return nil , errors .New ("charset contains duplicates" )
158- }
159- seen [r ] = true
160- }
161- return & Coding {charset : charset , numOfBits : numOfBits }, nil
188+ return & twoCoding {charset : charset , numOfBits : numOfBits }, nil
162189}
163190
164- // Encode encodes data from src and writes to dst.
165- func (c * Coding ) Encode (dst io.Writer , src io.Reader ) error {
191+ func (c * twoCoding ) Encode (dst io.Writer , src io.Reader ) error {
166192 bs , err := NewBitReader (c .numOfBits , src )
167193 if err != nil {
168194 panic (err )
@@ -192,8 +218,7 @@ func (c *Coding) Encode(dst io.Writer, src io.Reader) error {
192218 }
193219}
194220
195- // Decode decodes data from src and writes to dst.
196- func (c * Coding ) Decode (dst io.Writer , src io.Reader ) error {
221+ func (c * twoCoding ) Decode (dst io.Writer , src io.Reader ) error {
197222 bw := NewBitWriter (c .numOfBits , dst )
198223 bufStdin := bufio .NewReaderSize (src , 10 * 1024 )
199224 runeToByte := make (map [rune ]byte , len (c .charset ))
@@ -210,7 +235,10 @@ func (c *Coding) Decode(dst io.Writer, src io.Reader) error {
210235 }
211236 b , ok := runeToByte [r ]
212237 if ! ok {
213- continue
238+ if r == '\n' || r == '\r' {
239+ continue
240+ }
241+ return errors .New ("character " + string (r ) + "in input is not in the character set" )
214242 }
215243 err = bw .Write (b )
216244 if err != nil {
@@ -220,16 +248,17 @@ func (c *Coding) Decode(dst io.Writer, src io.Reader) error {
220248 return bw .Flush ()
221249}
222250
223- type ImpureCoding struct {
251+ // anyCoding works with character sets of any length but is less performant than twoCoding.
252+ type anyCoding struct {
224253 charset []rune
225254 rPerOctet int
226255}
227256
228- func NewImpureCoding (charset []rune ) (* ImpureCoding , error ) {
229- return & ImpureCoding {charset , runesPerOctet (charset )}, nil
257+ func newAnyCoding (charset []rune ) (* anyCoding , error ) {
258+ return & anyCoding {charset , runesPerOctet (charset )}, nil
230259}
231260
232- func (c * ImpureCoding ) Encode (dst io.Writer , src io.Reader ) error {
261+ func (c * anyCoding ) Encode (dst io.Writer , src io.Reader ) error {
233262 br := bufio .NewReaderSize (src , 10 * 1024 )
234263 result := make ([]rune , 0 , 10 * 1024 )
235264 buf := make ([]byte , 8 )
@@ -243,7 +272,6 @@ func (c *ImpureCoding) Encode(dst io.Writer, src io.Reader) error {
243272 }
244273
245274 result = append (result , encodeOctet (c .charset , buf , c .rPerOctet )... )
246- //result = append(result, ' ')
247275
248276 if len (result )+ 64 > cap (result ) {
249277 _ , err = dst .Write ([]byte (string (result )))
@@ -260,7 +288,6 @@ var resultBuf = make([]rune, 0, 64)
260288func encodeOctet (set []rune , octet []byte , rPerOctet int ) []rune {
261289 resultBuf = resultBuf [:0 ]
262290 i := bytesToInt (octet )
263- //println(i.String(), rPerOctet)
264291 resultBuf = toBase (i , resultBuf , set )
265292 for len (resultBuf ) < rPerOctet {
266293 // prepend with minimumum new allocations
@@ -279,9 +306,7 @@ func decodeToOctet(set []rune, runes []rune) ([]byte, error) {
279306 return num .FillBytes (make ([]byte , 8 )), nil
280307}
281308
282- // TODO. does not ignore non-charset runes in input. change the other encoding to also not tolerate those or change this one
283-
284- func (c * ImpureCoding ) Decode (dst io.Writer , src io.Reader ) error {
309+ func (c * anyCoding ) Decode (dst io.Writer , src io.Reader ) error {
285310 var err error
286311
287312 br := bufio .NewReaderSize (src , 10 * 1024 )
@@ -292,12 +317,12 @@ func (c *ImpureCoding) Decode(dst io.Writer, src io.Reader) error {
292317 for i := range buf {
293318 buf [i ], _ , err = br .ReadRune ()
294319 if err != nil {
295- if i == 0 && err == io .EOF {
320+ if err == io .EOF {
296321 _ , err = dst .Write (result )
297322 }
298323 return err
299324 }
300- if buf [i ] == '\n' {
325+ if buf [i ] == '\n' || buf [ i ] == '\r' {
301326 i -- // ignore newline, read rune again
302327 }
303328 }
@@ -354,7 +379,7 @@ func fromBase(enc []rune, set []rune) (*big.Int, error) {
354379 )
355380 idx := setMap [enc [i ]]
356381 if idx == - 1 {
357- return nil , errors .New ("could not decode " + string (enc ) + ": rune " + string ( enc [i ]) + " is not in charset " )
382+ return nil , errors .New ("character " + string (enc [i ]) + "in input is not in the character set " )
358383 }
359384 mult .Mul (mult , big .NewInt (idx )) // multiply "place value" with the digit at spot i
360385 result .Add (result , mult )
0 commit comments