Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,6 @@ matrix:
- MB_PYTHON_VERSION=2.7
- PLAT=i686
- UNICODE_WIDTH=16
- os: linux
env:
- MB_PYTHON_VERSION=3.3
- os: linux
env:
- MB_PYTHON_VERSION=3.3
- PLAT=i686
- os: linux
env:
- MB_PYTHON_VERSION=3.4
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@

## 11.0.0
- Upgrade to unicode 11.0.0
- Remove Python 3.3 support as wheel no longer supports Python 3.3.

## 10.0.0-2
- Wheel for python 3.6

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ unicodedata2

unicodedata backport/updates to python 3 and python 2.

The versions of this package match unicode versions, so unicodedata2==9.0.0 is data from unicode 9.0.0.
The versions of this package match unicode versions, so unicodedata2==11.0.0 is data from unicode 11.0.0.
Additionally this backports support for named aliases and named sequences to python2.
20 changes: 3 additions & 17 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
environment:
global:
# SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
# /E:ON and /V:ON options are not enabled in the batch script intepreter
# See: http://stackoverflow.com/a/13751649/163740
CMD_IN_ENV: "cmd /E:ON /V:ON /C %APPVEYOR_BUILD_FOLDER%\\multibuild\\ci\\appveyor\\windows_sdk.cmd"

matrix:

- PYTHON: "C:\\Python27"
Expand All @@ -15,14 +9,6 @@ environment:
PYTHON_VERSION: "2.7.x"
PYTHON_ARCH: "64"

- PYTHON: "C:\\Python33"
PYTHON_VERSION: "3.3.x"
PYTHON_ARCH: "32"

- PYTHON: "C:\\Python33-x64"
PYTHON_VERSION: "3.3.x"
PYTHON_ARCH: "64"

- PYTHON: "C:\\Python34"
PYTHON_VERSION: "3.4.x"
PYTHON_ARCH: "32"
Expand Down Expand Up @@ -63,7 +49,7 @@ install:

# Upgrade to the latest version of pip to avoid it displaying warnings
# about it being out of date.
- "pip install --disable-pip-version-check --user --upgrade pip"
- "python -m pip install --disable-pip-version-check --user --upgrade pip"

# Upgrade setuptools, wheel and virtualenv
- "pip install --upgrade setuptools wheel virtualenv"
Expand All @@ -77,11 +63,11 @@ build: false # Not a C# project, build stuff at the test step instead.

test_script:
# Build the compiled extension and run the project tests
- "%CMD_IN_ENV% python setup.py test"
- python setup.py test

after_test:
# If tests are successful, create a whl package for the project.
- "%CMD_IN_ENV% python setup.py bdist_wheel"
- python setup.py bdist_wheel
- ps: "ls dist"

artifacts:
Expand Down
39 changes: 19 additions & 20 deletions makeunicodedata.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#
# (re)generate unicode property and type databases. Taken from python 3.4 source.
#
# this script converts a unicode 3.2 database file to
# Modules/unicodedata_db.h, Modules/unicodename_db.h,
# and Objects/unicodetype_db.h
# This script converts Unicode database files to Modules/unicodedata_db.h,
# Modules/unicodename_db.h, and Objects/unicodetype_db.h
#
# history:
# 2000-09-24 fl created (based on bits and pieces from unidb)
Expand Down Expand Up @@ -37,10 +36,10 @@
from textwrap import dedent

SCRIPT = sys.argv[0]
VERSION = "3.2"
VERSION = "3.3"

# The Unicode Database
UNIDATA_VERSION = "10.0.0"
UNIDATA_VERSION = "11.0.0"
UNICODE_DATA = "UnicodeData%s.txt"
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
Expand Down Expand Up @@ -97,7 +96,7 @@
# these ranges need to match unicodedata.c:is_unified_ideograph
cjk_ranges = [
('3400', '4DB5'),
('4E00', '9FEA'),
('4E00', '9FEF'),
('20000', '2A6D6'),
('2A700', '2B734'),
('2B740', '2B81D'),
Expand Down Expand Up @@ -274,8 +273,8 @@ def makeunicodedata(unicode, trace):
print("struct reindex{int start;short count,index;};", file=fp)
print("static struct reindex nfc_first[] = {", file=fp)
for start,end in comp_first_ranges:
print(" { %d, %d, %d}," % (start,end-start,comp_first[start]), file=fp)
print(" {0,0,0}", file=fp)
print(" { %d, %d, %d}," % (start,end-start,comp_first[start]), file=fp)
print(" {0,0,0}", file=fp)
print("};\n", file=fp)
print("static struct reindex nfc_last[] = {", file=fp)
for start,end in comp_last_ranges:
Expand Down Expand Up @@ -351,28 +350,28 @@ def makeunicodedata(unicode, trace):
index1, index2, shift = splitbins(index, trace)
print("static const change_record change_records_%s[] = {" % cversion, file=fp)
for record in records:
print("\t{ %s }," % ", ".join(map(str,record)), file=fp)
print(" { %s }," % ", ".join(map(str,record)), file=fp)
print("};", file=fp)
Array("changes_%s_index" % cversion, index1).dump(fp, trace)
Array("changes_%s_data" % cversion, index2).dump(fp, trace)
print("static const change_record* get_change_%s(Py_UCS4 n)" % cversion, file=fp)
print("{", file=fp)
print("\tint index;", file=fp)
print("\tif (n >= 0x110000) index = 0;", file=fp)
print("\telse {", file=fp)
print("\t\tindex = changes_%s_index[n>>%d];" % (cversion, shift), file=fp)
print("\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];" % \
print(" int index;", file=fp)
print(" if (n >= 0x110000) index = 0;", file=fp)
print(" else {", file=fp)
print(" index = changes_%s_index[n>>%d];" % (cversion, shift), file=fp)
print(" index = changes_%s_data[(index<<%d)+(n & %d)];" % \
(cversion, shift, ((1<<shift)-1)), file=fp)
print("\t}", file=fp)
print("\treturn change_records_%s+index;" % cversion, file=fp)
print(" }", file=fp)
print(" return change_records_%s+index;" % cversion, file=fp)
print("}\n", file=fp)
print("static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion, file=fp)
print("{", file=fp)
print("\tswitch(n) {", file=fp)
print(" switch(n) {", file=fp)
for k, v in normalization:
print("\tcase %s: return 0x%s;" % (hex(k), v), file=fp)
print("\tdefault: return 0;", file=fp)
print("\t}\n}\n", file=fp)
print(" case %s: return 0x%s;" % (hex(k), v), file=fp)
print(" default: return 0;", file=fp)
print(" }\n}\n", file=fp)

fp.close()

Expand Down
39 changes: 19 additions & 20 deletions makeunicodedata3.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#
# (re)generate unicode property and type databases
#
# this script converts a unicode 3.2 database file to
# Modules/unicodedata_db.h, Modules/unicodename_db.h,
# and Objects/unicodetype_db.h
# This script converts Unicode database files to Modules/unicodedata_db.h,
# Modules/unicodename_db.h, and Objects/unicodetype_db.h
#
# history:
# 2000-09-24 fl created (based on bits and pieces from unidb)
Expand Down Expand Up @@ -34,15 +33,15 @@
from textwrap import dedent

SCRIPT = sys.argv[0]
VERSION = "3.2"
VERSION = "3.3"

# The Unicode Database
# --------------------
# When changing UCD version please update
# * Doc/library/stdtypes.rst, and
# * Doc/library/unicodedata.rst
# * Doc/reference/lexical_analysis.rst (two occurrences)
UNIDATA_VERSION = "10.0.0"
UNIDATA_VERSION = "11.0.0"
UNICODE_DATA = "UnicodeData%s.txt"
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
Expand Down Expand Up @@ -99,7 +98,7 @@
# these ranges need to match unicodedata.c:is_unified_ideograph
cjk_ranges = [
('3400', '4DB5'),
('4E00', '9FEA'),
('4E00', '9FEF'),
('20000', '2A6D6'),
('2A700', '2B734'),
('2B740', '2B81D'),
Expand Down Expand Up @@ -276,8 +275,8 @@ def makeunicodedata(unicode, trace):
print("struct reindex{int start;short count,index;};", file=fp)
print("static struct reindex nfc_first[] = {", file=fp)
for start,end in comp_first_ranges:
print(" { %d, %d, %d}," % (start,end-start,comp_first[start]), file=fp)
print(" {0,0,0}", file=fp)
print(" { %d, %d, %d}," % (start,end-start,comp_first[start]), file=fp)
print(" {0,0,0}", file=fp)
print("};\n", file=fp)
print("static struct reindex nfc_last[] = {", file=fp)
for start,end in comp_last_ranges:
Expand Down Expand Up @@ -353,28 +352,28 @@ def makeunicodedata(unicode, trace):
index1, index2, shift = splitbins(index, trace)
print("static const change_record change_records_%s[] = {" % cversion, file=fp)
for record in records:
print("\t{ %s }," % ", ".join(map(str,record)), file=fp)
print(" { %s }," % ", ".join(map(str,record)), file=fp)
print("};", file=fp)
Array("changes_%s_index" % cversion, index1).dump(fp, trace)
Array("changes_%s_data" % cversion, index2).dump(fp, trace)
print("static const change_record* get_change_%s(Py_UCS4 n)" % cversion, file=fp)
print("{", file=fp)
print("\tint index;", file=fp)
print("\tif (n >= 0x110000) index = 0;", file=fp)
print("\telse {", file=fp)
print("\t\tindex = changes_%s_index[n>>%d];" % (cversion, shift), file=fp)
print("\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];" % \
print(" int index;", file=fp)
print(" if (n >= 0x110000) index = 0;", file=fp)
print(" else {", file=fp)
print(" index = changes_%s_index[n>>%d];" % (cversion, shift), file=fp)
print(" index = changes_%s_data[(index<<%d)+(n & %d)];" % \
(cversion, shift, ((1<<shift)-1)), file=fp)
print("\t}", file=fp)
print("\treturn change_records_%s+index;" % cversion, file=fp)
print(" }", file=fp)
print(" return change_records_%s+index;" % cversion, file=fp)
print("}\n", file=fp)
print("static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion, file=fp)
print("{", file=fp)
print("\tswitch(n) {", file=fp)
print(" switch(n) {", file=fp)
for k, v in normalization:
print("\tcase %s: return 0x%s;" % (hex(k), v), file=fp)
print("\tdefault: return 0;", file=fp)
print("\t}\n}\n", file=fp)
print(" case %s: return 0x%s;" % (hex(k), v), file=fp)
print(" default: return 0;", file=fp)
print(" }\n}\n", file=fp)

fp.close()

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)

setup (name = "unicodedata2",
version = "10.0.0-2",
version = "11.0.0",
description = "Unicodedata backport for python 2/3 updated to the latest unicode version.",
ext_modules = [module1],
author="Mike Kaplinskiy",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_unicodedata2.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):

# Update this if the database changes. Make sure to do a full rebuild
# (e.g. 'make distclean && make') to get the correct checksum.
expectedchecksum = 'db6f92bb5010f8e85000634b08e77233355ab37a'
expectedchecksum = '4f73278b19c2ec3099724c132f0b90a1d25c19e4'
def test_function_checksum(self):
data = []
h = hashlib.sha1()
Expand Down
2 changes: 1 addition & 1 deletion unicodedata2/py2/unicodedata.c
Original file line number Diff line number Diff line change
Expand Up @@ -882,7 +882,7 @@ is_unified_ideograph(Py_UCS4 code)
{
return
(0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
(0x4E00 <= code && code <= 0x9FEA) || /* CJK Ideograph */
(0x4E00 <= code && code <= 0x9FEF) || /* CJK Ideograph */
(0x20000 <= code && code <= 0x2A6D6) || /* CJK Ideograph Extension B */
(0x2A700 <= code && code <= 0x2B734) || /* CJK Ideograph Extension C */
(0x2B740 <= code && code <= 0x2B81D) || /* CJK Ideograph Extension D */
Expand Down
Loading