public inbox for bitcoindev@googlegroups.com
 help / color / mirror / Atom feed
* [Bitcoin-development] RFC: c32d encoding
@ 2013-05-15  6:33 Luke-Jr
  0 siblings, 0 replies; only message in thread
From: Luke-Jr @ 2013-05-15  6:33 UTC (permalink / raw)
  To: bitcoin-development

[-- Attachment #1: Type: text/plain, Size: 1814 bytes --]

https://bitcointalk.org/?topic=205878

This encoding is designed so that it could replace Base58Check in new data, 
with the following goals in mind:
- Impossible(?) to manipulate without completely changing it
- Clearly identifiable prefix, regardless of data size
- Cheaper to process (simpler and faster code; it's a power-of-two radix)
- Fixed length string for fixed length data
- More unambiguous (removal of chars 'isuvzSVZ')
- Compatible with using seven-segment displays
- Altcoin friendly (16 bit namespace, which can be read without decoding)

Since there are fewer digits and more identifying/signature characters, 
addresses are longer. This should be less of a problem since ordinary users 
will hopefully be using addresses less common as the payment protocol becomes 
more popular.

Example Python code (including tests) is attached.
I can write up a formal BIP if this seems useful.

For example:

160 bits of data, such as current addresses:
    2nc111dhAPE2aUdYAOF88JhLn5jEjbULy4eFe9tyFYFE8
An ordinary P2SH destination, incorporating Greg's "require the hash mid-image 
to be relayed" concept (256 bits of data):
    2bc511A95e74P13dPb6b5t7yrh12EhC363ayH98n1cFbr3rAHdA49nCcC1G3P71j
The same key in Namecoin:
    2nc5119ttL35HPhc3Hh6aHe2tOhF6rdFtAOE1ahFLt9Ecabhcn5FLea5Le71P56C
The example "puzzle" script from the wiki (arbitrary scripting):
    2bc311d126acCyAnHAjabeUtOHcr7F811j4UYE6ECtOcbcGGn4O9chAt7O7y2LU9ty9cnG4
An alternative for BIP32 extended public keys (560 bits):
    2bc911AcchHheAGFnn9LC6FdF7bOc99APJtcEc46U655JheH6LCr3Y333eFEOtPJ9rj22rEcchHheAGFnn9LC6FdF7bOc99APJtcEc46U655JheH6LCr3YJCtPYea
An alternative for BIP32 extended private keys (552 bits):
    2bcb11O77GHdP53FH7Jh44OdEh3rLd4eFr2h7c8rGeErELG18yCy9O7L9LednyHJa5hyeAP77GHdP53FH7Jh44OdEh3rLd4eFr2h7c8rGeErELG18yCyGG5drPF1

[-- Attachment #2: c32.py --]
[-- Type: text/x-python, Size: 1676 bytes --]

# Digits are chosen to be the least ambiguous
# They all have unique seven-segment glyphs, and cannot be easily confused by humans
digits = '123456789abcdehjnrtyACEFGHJLOPUY'
radix = len(digits)

def encode(v):
	if not len(v):
		return ''
	n = 0
	bits = 0
	o = []
	pad = (len(v) * 8) % 5
	if pad:
		v = b'\0' + v
	v = bytearray(v)  # For Python 2.7 compatibility
	for i in range(len(v) - 1, -1, -1):
		n |= v[i] << bits
		bits += 8
		while bits >= 5:
			o.insert(0, digits[n & 0x1f])
			n >>= 5
			bits -= 5
			if i == 0 and pad:
				break
	return ''.join(o)

def decode(s):
	n = 0
	bits = 0
	o = bytearray()
	for i in range(len(s) - 1, -1, -1):
		n |= digits.index(s[i]) << bits
		bits += 5
		while bits >= 8:
			o.insert(0, n & 0xff)
			n >>= 8
			bits -= 8
	return bytes(o)

def test():
	from math import ceil
	assert '' == encode(b'')
	for (i, oc) in (
		(1, '8'),
		(2, '2'),
		(3, 'j'),
		(4, '4'),
		(5, 'Y'),
		(6, '8'),
		(7, '2'),
		(8, 'j'),
		(9, '4'),
	):
		ol = int(ceil(i * 8 / 5.))
		try:
			inzero = b'\0' * i
			inone = b'\xff' * i
			ezero = encode(inzero)
			eone = encode(inone)
			dzero = decode(ezero)
			done = decode(eone)
			
			assert ezero == '1' * ol
			assert eone == oc + ('Y' * (ol - 1))
			assert dzero == inzero
			assert done == inone
		except AssertionError:
			raise AssertionError('Input of length %s failed test' % (i,))
	try:
		for c in range(1024):
			decode('111' + chr(c))
	except ValueError:
		pass
	else:
		raise AssertionError('Invalid decode input (%02x) did not throw a ValueError' % (c,))

if __name__ == '__main__':
	test()
	print("Tests passed")

[-- Attachment #3: c32d.py --]
[-- Type: text/x-python, Size: 5168 bytes --]

import c32
import hashlib
import struct

def _checksum(v):
	return hashlib.sha256(hashlib.sha256(v).digest()).digest()[-4:]

'''
String format:
- c32(Raw format) in reverse order

Raw format:
- 4 bytes checksum
- N bytes data (NOTE: encoded to prevent hidden changes)
- - for script:
- - - N bytes: varint preimage data length
- - - N bytes: preimage data
- - - N bytes: script data
- - for BIP32 HD parent key:
- - - 32 bytes: chain code
- - - 33 bytes: parent pubkey
- - for BIP32 serialized key:
- - - 1 byte: depth
- - - 4 bytes: child number
- - - 32 bytes: chain code
- - - One of:
- - - - 32 bytes: private key data
- - - - 33 bytes: public key data
- 1 byte flag (ignored if unknown)
- 1 byte type
- - 01 script (with preimage data)
- - 02 script hash preimage
- - 03 BIP32 HD parent key
- - 04 BIP32 serialized public key
- 2 bytes namespace (blockchain id)
- - 2d41 Bitcoin  ('2bc')
- - 2e01 Namecoin ('2nc')
- - 2e37 Freicoin ('FRC')
'''

class c32d:
	__slots__ = ('data', 'ns', 'dtype', 'dflag')
	
	def __init__(self, data, ns, dtype, dflag):
		self.data = data
		self.ns = ns
		self.dtype = dtype
		self.dflag = dflag
	
	@classmethod
	def decode(cls, s, raw = False):
		if not raw:
			full = c32.decode(s[::-1])
		else:
			full = s
		
		csum = bytearray(full[:4])
		v = bytearray(full[4:])
		
		# Encode the configuration bytes to simplify decoding
		pv = 0xbf
		for i in range(len(v) - 1, len(v) - 5, -1):
			pv = v[i] ^ (csum[i % 4]) ^ pv
			v[i] = pv
		
		v.append(0xbf)
		for i in range(len(v) - 1):
			v[i] ^= csum[i % 4] ^ v[i + 1]
		v.pop()
		
		v = bytes(v)
		if csum != _checksum(v):
			raise ValueError('c32d checksum wrong')
		
		o = cls(None, None, None, None)
		o.data = v[:-4]
		o.dflag = v[-4]
		o.dtype = v[-3]
		o.ns = struct.unpack('!H', v[-2:])[0]
		
		return o
	
	def encode(self, raw = False):
		try:
			v = self.data + struct.pack('!BBH', self.dflag, self.dtype, self.ns)
		except struct.error as e:
			raise ValueError(e)
		csum = bytearray(_checksum(v))
		
		v = bytearray(v)
		pv = 0xbf
		for i in range(len(v) - 1, -1, -1):
			pv = v[i] ^ csum[i % 4] ^ pv
			if i < len(v) - 4:
				v[i] = pv
		v = csum + bytes(v)
		
		if raw:
			return v
		
		return c32.encode(v)[::-1]

decode = c32d.decode

def encode(*a, **ka):
	return c32d(*a, **ka).encode()

def test():
	c32.test()
	for (p, s, raw) in (
		((b'', 0, 0, 0), '1111115Fd9acc', b'\xb5\xa5\x0c\xb9\x00\x00\x00\x00'),
		((b'test', 4232, 142, 219), '955OGe8hOGc97hH4EJj1', b'?V\x1e\\d/\x1cq\xdb\x8e\x10\x88'),
		((b'\xff' * 0x100, 0xffff, 0xff, 0xff), 'YYYYYYc327OYcC6F9Or6r14UYCJtc5UGt9n2YYbeH63jda5GnYjCEO3r8E53dGYFbchrG4c327OYcC6F9Or6r14UYCJtc5UGt9n2YYbeH63jda5GnYjCEO3r8E53dGYFbchrG4c327OYcC6F9Or6r14UYCJtc5UGt9n2YYbeH63jda5GnYjCEO3r8E53dGYFbchrG4c327OYcC6F9Or6r14UYCJtc5UGt9n2YYbeH63jda5GnYjCEO3r8E53dGYFbchrG4c327OYcC6F9Or6r14UYCJtc5UGt9n2YYbeH63jda5GnYjCEO3r8E53dGYFbchrG4c327OYcC6F9Or6r14UYCJtc5UGt9n2YYbeH63jda5GnYjCEO3r8E53dGYFbchrG4c327OYcC6F9Or6r14UYCJtc5UGb2cOdG3', b'\xb0\xce,*\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xc7\x88\xb9j\xbf\xf0\xc1\x12\xff\xff\xff\xff'),
	):
		(kp, pp) = ({}, p)
		for i in range(2):
			o = c32d(*pp, **kp)
			assert o.data == p[0]
			assert o.ns == p[1]
			assert o.dtype == p[2]
			assert o.dflag == p[3]
			kp = {
				'data': p[0],
				'ns': p[1],
				'dtype': p[2],
				'dflag': p[3],
			}
			pp = ()
		assert o.encode() == s
		assert o.encode(raw=True) == raw
	
	def ensureValueError(f):
		try:
			f()
		except ValueError:
			pass
		else:
			raise AssertionError('Invalid decode input did not throw a ValueError')
	ensureValueError(lambda: encode(b'', -1, 0, 0))
	ensureValueError(lambda: encode(b'', 0x10000, 0, 0))
	ensureValueError(lambda: encode(b'', 0, -1, 0))
	ensureValueError(lambda: encode(b'', 0, 0x100, 0))
	ensureValueError(lambda: encode(b'', 0, 0, -1))
	ensureValueError(lambda: encode(b'', 0, 0, 0x100))
	
	# Invalid c32d
	ensureValueError(lambda: decode('1111115Fd9adc'))
	ensureValueError(lambda: decode('11A1115Fd9acc'))
	
	# Invalid c32
	ensureValueError(lambda: decode('111x115Fd9acc'))
	ensureValueError(lambda: decode('1111115Fd9acx'))

if __name__ == '__main__':
	test()
	print("Tests passed")

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2013-05-15  6:33 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-05-15  6:33 [Bitcoin-development] RFC: c32d encoding Luke-Jr

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox