Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(1)

Side by Side Diff: Tools/unicode/makeunicodedata.py

Issue 767: [issue2630] repr() should not escape non-ASCII characters (Closed) SVN Base: http://svn.python.org/view/*checkout*/python/branches/py3k/
Patch Set: Created 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 # 1 #
2 # (re)generate unicode property and type databases 2 # (re)generate unicode property and type databases
3 # 3 #
4 # this script converts a unicode 3.2 database file to 4 # this script converts a unicode 3.2 database file to
5 # Modules/unicodedata_db.h, Modules/unicodename_db.h, 5 # Modules/unicodedata_db.h, Modules/unicodename_db.h,
6 # and Objects/unicodetype_db.h 6 # and Objects/unicodetype_db.h
7 # 7 #
8 # history: 8 # history:
9 # 2000-09-24 fl created (based on bits and pieces from unidb) 9 # 2000-09-24 fl created (based on bits and pieces from unidb)
10 # 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table 10 # 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table
(...skipping 42 matching lines...) Show 10 above Show 10 below
53 ALPHA_MASK = 0x01 53 ALPHA_MASK = 0x01
54 DECIMAL_MASK = 0x02 54 DECIMAL_MASK = 0x02
55 DIGIT_MASK = 0x04 55 DIGIT_MASK = 0x04
56 LOWER_MASK = 0x08 56 LOWER_MASK = 0x08
57 LINEBREAK_MASK = 0x10 57 LINEBREAK_MASK = 0x10
58 SPACE_MASK = 0x20 58 SPACE_MASK = 0x20
59 TITLE_MASK = 0x40 59 TITLE_MASK = 0x40
60 UPPER_MASK = 0x80 60 UPPER_MASK = 0x80
61 XID_START_MASK = 0x100 61 XID_START_MASK = 0x100
62 XID_CONTINUE_MASK = 0x200 62 XID_CONTINUE_MASK = 0x200
63 HEX_ESCAPE_MASK = 0x400
63 64
64 def maketables(trace=0): 65 def maketables(trace=0):
65 66
66 print("--- Reading", UNICODE_DATA % "", "...") 67 print("--- Reading", UNICODE_DATA % "", "...")
67 68
68 version = "" 69 version = ""
69 unicode = UnicodeData(UNICODE_DATA % version, 70 unicode = UnicodeData(UNICODE_DATA % version,
70 COMPOSITION_EXCLUSIONS % version, 71 COMPOSITION_EXCLUSIONS % version,
71 EASTASIAN_WIDTH % version, 72 EASTASIAN_WIDTH % version,
72 DERIVED_CORE_PROPERTIES % version) 73 DERIVED_CORE_PROPERTIES % version)
(...skipping 292 matching lines...) Show 10 above Show 10 below
365 if category == "Ll": 366 if category == "Ll":
366 flags |= LOWER_MASK 367 flags |= LOWER_MASK
367 if category == "Zl" or bidirectional == "B": 368 if category == "Zl" or bidirectional == "B":
368 flags |= LINEBREAK_MASK 369 flags |= LINEBREAK_MASK
369 if category == "Zs" or bidirectional in ("WS", "B", "S"): 370 if category == "Zs" or bidirectional in ("WS", "B", "S"):
370 flags |= SPACE_MASK 371 flags |= SPACE_MASK
371 if category == "Lt": 372 if category == "Lt":
372 flags |= TITLE_MASK 373 flags |= TITLE_MASK
373 if category == "Lu": 374 if category == "Lu":
374 flags |= UPPER_MASK 375 flags |= UPPER_MASK
376 if category[0] == "C":
377 flags |= HEX_ESCAPE_MASK
378 if category[0] == "Z" and char != " ":
379 flags |= HEX_ESCAPE_MASK
375 if "XID_Start" in properties: 380 if "XID_Start" in properties:
376 flags |= XID_START_MASK 381 flags |= XID_START_MASK
377 if "XID_Continue" in properties: 382 if "XID_Continue" in properties:
378 flags |= XID_CONTINUE_MASK 383 flags |= XID_CONTINUE_MASK
379 # use delta predictor for upper/lower/title 384 # use delta predictor for upper/lower/title
380 if record[12]: 385 if record[12]:
381 upper = int(record[12], 16) - char 386 upper = int(record[12], 16) - char
382 assert -32768 <= upper <= 32767 387 assert -32768 <= upper <= 32767
383 upper = upper & 0xffff 388 upper = upper & 0xffff
384 else: 389 else:
(...skipping 600 matching lines...) Show 10 above Show 10 below
985 dump(t1, t2, shift, bytes) 990 dump(t1, t2, shift, bytes)
986 if __debug__: 991 if __debug__:
987 # exhaustively verify that the decomposition is correct 992 # exhaustively verify that the decomposition is correct
988 mask = ~((~0) << shift) # i.e., low-bit mask of shift bits 993 mask = ~((~0) << shift) # i.e., low-bit mask of shift bits
989 for i in range(len(t)): 994 for i in range(len(t)):
990 assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)] 995 assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
991 return best 996 return best
992 997
993 if __name__ == "__main__": 998 if __name__ == "__main__":
994 maketables(1) 999 maketables(1)
OLDNEW

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld r338