| OLD | NEW |
| 1 # | 1 # |
| 2 # (re)generate unicode property and type databases | 2 # (re)generate unicode property and type databases |
| 3 # | 3 # |
| 4 # this script converts a unicode 3.2 database file to | 4 # this script converts a unicode 3.2 database file to |
| 5 # Modules/unicodedata_db.h, Modules/unicodename_db.h, | 5 # Modules/unicodedata_db.h, Modules/unicodename_db.h, |
| 6 # and Objects/unicodetype_db.h | 6 # and Objects/unicodetype_db.h |
| 7 # | 7 # |
| 8 # history: | 8 # history: |
| 9 # 2000-09-24 fl created (based on bits and pieces from unidb) | 9 # 2000-09-24 fl created (based on bits and pieces from unidb) |
| 10 # 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table | 10 # 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table |
| (...skipping 42 matching lines...) Show 10 above Show 10 below |
| 53 ALPHA_MASK = 0x01 | 53 ALPHA_MASK = 0x01 |
| 54 DECIMAL_MASK = 0x02 | 54 DECIMAL_MASK = 0x02 |
| 55 DIGIT_MASK = 0x04 | 55 DIGIT_MASK = 0x04 |
| 56 LOWER_MASK = 0x08 | 56 LOWER_MASK = 0x08 |
| 57 LINEBREAK_MASK = 0x10 | 57 LINEBREAK_MASK = 0x10 |
| 58 SPACE_MASK = 0x20 | 58 SPACE_MASK = 0x20 |
| 59 TITLE_MASK = 0x40 | 59 TITLE_MASK = 0x40 |
| 60 UPPER_MASK = 0x80 | 60 UPPER_MASK = 0x80 |
| 61 XID_START_MASK = 0x100 | 61 XID_START_MASK = 0x100 |
| 62 XID_CONTINUE_MASK = 0x200 | 62 XID_CONTINUE_MASK = 0x200 |
| 63 HEX_ESCAPE_MASK = 0x400 |
| 63 | 64 |
| 64 def maketables(trace=0): | 65 def maketables(trace=0): |
| 65 | 66 |
| 66 print("--- Reading", UNICODE_DATA % "", "...") | 67 print("--- Reading", UNICODE_DATA % "", "...") |
| 67 | 68 |
| 68 version = "" | 69 version = "" |
| 69 unicode = UnicodeData(UNICODE_DATA % version, | 70 unicode = UnicodeData(UNICODE_DATA % version, |
| 70 COMPOSITION_EXCLUSIONS % version, | 71 COMPOSITION_EXCLUSIONS % version, |
| 71 EASTASIAN_WIDTH % version, | 72 EASTASIAN_WIDTH % version, |
| 72 DERIVED_CORE_PROPERTIES % version) | 73 DERIVED_CORE_PROPERTIES % version) |
| (...skipping 292 matching lines...) Show 10 above Show 10 below |
| 365 if category == "Ll": | 366 if category == "Ll": |
| 366 flags |= LOWER_MASK | 367 flags |= LOWER_MASK |
| 367 if category == "Zl" or bidirectional == "B": | 368 if category == "Zl" or bidirectional == "B": |
| 368 flags |= LINEBREAK_MASK | 369 flags |= LINEBREAK_MASK |
| 369 if category == "Zs" or bidirectional in ("WS", "B", "S"): | 370 if category == "Zs" or bidirectional in ("WS", "B", "S"): |
| 370 flags |= SPACE_MASK | 371 flags |= SPACE_MASK |
| 371 if category == "Lt": | 372 if category == "Lt": |
| 372 flags |= TITLE_MASK | 373 flags |= TITLE_MASK |
| 373 if category == "Lu": | 374 if category == "Lu": |
| 374 flags |= UPPER_MASK | 375 flags |= UPPER_MASK |
| 376 if category[0] == "C": |
| 377 flags |= HEX_ESCAPE_MASK |
| 378 if category[0] == "Z" and char != " ": |
| 379 flags |= HEX_ESCAPE_MASK |
| 375 if "XID_Start" in properties: | 380 if "XID_Start" in properties: |
| 376 flags |= XID_START_MASK | 381 flags |= XID_START_MASK |
| 377 if "XID_Continue" in properties: | 382 if "XID_Continue" in properties: |
| 378 flags |= XID_CONTINUE_MASK | 383 flags |= XID_CONTINUE_MASK |
| 379 # use delta predictor for upper/lower/title | 384 # use delta predictor for upper/lower/title |
| 380 if record[12]: | 385 if record[12]: |
| 381 upper = int(record[12], 16) - char | 386 upper = int(record[12], 16) - char |
| 382 assert -32768 <= upper <= 32767 | 387 assert -32768 <= upper <= 32767 |
| 383 upper = upper & 0xffff | 388 upper = upper & 0xffff |
| 384 else: | 389 else: |
| (...skipping 600 matching lines...) Show 10 above Show 10 below |
| 985 dump(t1, t2, shift, bytes) | 990 dump(t1, t2, shift, bytes) |
| 986 if __debug__: | 991 if __debug__: |
| 987 # exhaustively verify that the decomposition is correct | 992 # exhaustively verify that the decomposition is correct |
| 988 mask = ~((~0) << shift) # i.e., low-bit mask of shift bits | 993 mask = ~((~0) << shift) # i.e., low-bit mask of shift bits |
| 989 for i in range(len(t)): | 994 for i in range(len(t)): |
| 990 assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)] | 995 assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)] |
| 991 return best | 996 return best |
| 992 | 997 |
| 993 if __name__ == "__main__": | 998 if __name__ == "__main__": |
| 994 maketables(1) | 999 maketables(1) |
| OLD | NEW |