我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用builtins.chr()。
def __init__( self, sql, hive_table, create=True, recreate=False, partition=None, delimiter=chr(1), vertica_conn_id='vertica_default', hive_cli_conn_id='hive_cli_default', *args, **kwargs): super(VerticaToHiveTransfer, self).__init__(*args, **kwargs) self.sql = sql self.hive_table = hive_table self.partition = partition self.create = create self.recreate = recreate self.delimiter = str(delimiter) self.vertica_conn_id = vertica_conn_id self.hive_cli_conn_id = hive_cli_conn_id self.partition = partition or {}
def __init__( self, sql, hive_table, create=True, recreate=False, partition=None, delimiter=chr(1), mssql_conn_id='mssql_default', hive_cli_conn_id='hive_cli_default', tblproperties=None, *args, **kwargs): super(MsSqlToHiveTransfer, self).__init__(*args, **kwargs) self.sql = sql self.hive_table = hive_table self.partition = partition self.create = create self.recreate = recreate self.delimiter = delimiter self.mssql_conn_id = mssql_conn_id self.hive_cli_conn_id = hive_cli_conn_id self.partition = partition or {} self.tblproperties = tblproperties
def __init__( self, sql, hive_table, create=True, recreate=False, partition=None, delimiter=chr(1), mysql_conn_id='mysql_default', hive_cli_conn_id='hive_cli_default', tblproperties=None, *args, **kwargs): super(MySqlToHiveTransfer, self).__init__(*args, **kwargs) self.sql = sql self.hive_table = hive_table self.partition = partition self.create = create self.recreate = recreate self.delimiter = str(delimiter) self.mysql_conn_id = mysql_conn_id self.hive_cli_conn_id = hive_cli_conn_id self.partition = partition or {} self.tblproperties = tblproperties
def truelen(string): """ It appears one Asian character takes two spots, but __len__ counts it as three, so this function counts the dispalyed length of the string. >>> truelen('abc') 3 >>> truelen('??') 4 >>> truelen('1?3') 4 >>> truelen('') 0 """ return len(string) - sum(1 for c in string if c > chr(127)) / 3
def get_string_u_at_rva(self, rva, max_length = 2**16): """Get an Unicode string located at the given address.""" try: # If the RVA is invalid all would blow up. Some EXEs seem to be # specially nasty and have an invalid RVA. data = self.get_data(rva, 2) except PEFormatError as e: return None s = u'' for idx in range(max_length): data = self.get_data(rva+2*idx, 2) try: uchr = struct.unpack(b'<H', data)[0] except struct.error: break if uchr == 0: break s += chr(uchr) return s.encode('ascii', 'backslashreplace')
def __init__( self, sql, hive_table, create=True, recreate=False, partition=None, delimiter=chr(1), mssql_conn_id='mssql_default', hive_cli_conn_id='hive_cli_default', *args, **kwargs): super(MsSqlToHiveTransfer, self).__init__(*args, **kwargs) self.sql = sql self.hive_table = hive_table self.partition = partition self.create = create self.recreate = recreate self.delimiter = delimiter self.mssql_conn_id = mssql_conn_id self.hive_cli_conn_id = hive_cli_conn_id self.partition = partition or {}
def __init__( self, sql, hive_table, create=True, recreate=False, partition=None, delimiter=chr(1), mysql_conn_id='mysql_default', hive_cli_conn_id='hive_cli_default', *args, **kwargs): super(MySqlToHiveTransfer, self).__init__(*args, **kwargs) self.sql = sql self.hive_table = hive_table self.partition = partition self.create = create self.recreate = recreate self.delimiter = str(delimiter) self.mysql_conn_id = mysql_conn_id self.hive_cli_conn_id = hive_cli_conn_id self.partition = partition or {}
def argument_names(args): """Give arguments alpha-numeric names. >>> names = argument_names(range(100)) >>> [names[i] for i in range(0,100,26)] [u'?a', u'?a1', u'?a2', u'?a3'] >>> [names[i] for i in range(1,100,26)] [u'?b', u'?b1', u'?b2', u'?b3'] """ # Argument naming scheme: integer -> `?[a-z]` with potentially a number if # there more than 26 arguments. name = {} for i, arg in enumerate(args): c = i // 26 if i >= 26 else '' name[arg] = '?%s%s' % (chr(97+(i % 26)), c) return name
def as_unicode(self) -> str: """ Builds an unicode string from integers in the list, assuming that not all of them are bytes and some of them can be large Unicode codepoints. """ return "".join([chr(x) for x in self.elements_])
def escapecp(cp): return '&%s;' % codepoint2name[cp] if (cp in codepoint2name) else chr(cp)
def parse(cls, fp): """ >>> from io import BytesIO >>> fp = BytesIO(b'\\x01\\x00\\x00\\x02') >>> str(QNameDictionaryTextRecord.parse(fp)) 'b:Envelope' """ prefix = chr(struct.unpack(b'<B', fp.read(1))[0] + ord('a')) idx = struct.unpack(b'<BBB', fp.read(3)) index = idx[0] << 16 | idx[1] << 8 | idx[2] return cls(prefix, index)
def unescape(self, s): if '&' not in s: return s def replaceEntities(s): s = s.groups()[0] try: if s[0] == "#": s = s[1:] if s[0] in ['x','X']: c = int(s[1:], 16) else: c = int(s) return chr(c) except ValueError: return '&#'+s+';' else: # Cannot use name2codepoint directly, because HTMLParser supports apos, # which is not part of HTML 4 import htmlentitydefs if HTMLParser.entitydefs is None: entitydefs = HTMLParser.entitydefs = {'apos':u"'"} for k, v in htmlentitydefs.name2codepoint.iteritems(): entitydefs[k] = chr(v) try: return self.entitydefs[s] except KeyError: return '&'+s+';' return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));", replaceEntities, s)
def handle_charref(self, name): if name[0] == 'x': self.handle_data(chr(int(name[1:], 16))) else: self.handle_data(chr(int(name, 10)))
def aesEncrypt(text, secKey): pad = 16 - len(text) % 16 text = text + chr(pad) * pad encryptor = AES.new(secKey, 2, '0102030405060708') ciphertext = encryptor.encrypt(text) ciphertext = base64.b64encode(ciphertext).decode('utf-8') return ciphertext
def update(self): self.display = self.content curTime = time() // 1 offset = max(int((curTime - self.START) % len(self.content)) - 1, 0) while offset > 0: if self.display[0] > chr(127): offset -= 1 self.display = self.display[3:] + self.display[:3] else: offset -= 1 self.display = self.display[1:] + self.display[:1] # self.display = self.content[offset:] + self.content[:offset]
def insertNewline_(self, sender): # Distinguish between carriage return (u'\r') and enter (u'\x03'). # Only the return key press gets sent as an on_text event. event = NSApplication.sharedApplication().currentEvent() chars = event.charactersIgnoringModifiers() ch = chr(chars.characterAtIndex_(0)) if ch == u'\r': self._window.dispatch_event("on_text", u'\r')
def _event_char(self, msg, wParam, lParam): text = chr(wParam) if unicodedata.category(text) != 'Cc' or text == '\r': self.dispatch_event('on_text', text) return 0
def _get_character_map_format4(self, offset): # This is absolutely, without question, the *worst* file # format ever. Whoever the fuckwit is that thought this up is # a fuckwit. header = _read_cmap_format4Header(self._data, offset) seg_count = header.seg_count_x2 // 2 array_size = struct.calcsize('>%dH' % seg_count) end_count = self._read_array('>%dH' % seg_count, offset + header.size) start_count = self._read_array('>%dH' % seg_count, offset + header.size + array_size + 2) id_delta = self._read_array('>%dh' % seg_count, offset + header.size + array_size + 2 + array_size) id_range_offset_address = \ offset + header.size + array_size + 2 + array_size + array_size id_range_offset = self._read_array('>%dH' % seg_count, id_range_offset_address) character_map = {} for i in range(0, seg_count): if id_range_offset[i] != 0: if id_range_offset[i] == 65535: continue # Hack around a dodgy font (babelfish.ttf) for c in range(start_count[i], end_count[i] + 1): addr = id_range_offset[i] + 2*(c - start_count[i]) + \ id_range_offset_address + 2*i g = struct.unpack('>H', self._data[addr:addr+2])[0] if g != 0: character_map[chr(c)] = (g + id_delta[i]) % 65536 else: for c in range(start_count[i], end_count[i] + 1): g = (c + id_delta[i]) % 65536 if g != 0: character_map[chr(c)] = g return character_map
def handle_entityref(self, name): if name in entities.name2codepoint: self.handle_data(chr(entities.name2codepoint[name]))
def Str255(value): return create_string_buffer(chr(len(value)) + value)
def aesEncrypt(text, secKey): pad = 16 - len(text) % 16 text = text + chr(pad) * pad encryptor = AES.new(secKey, 2, '0102030405060708') ciphertext = encryptor.encrypt(text) ciphertext = base64.b64encode(ciphertext).decode('u8') return ciphertext
def _read_string(self): self.__text.mark() try: src, value = [self._read_literal('"')], [] while True: chunk = self.__text.read_until_any(('"', '\\')) src.append(chunk) value.append(chunk[:-1]) if chunk.endswith('\\'): ch = self.__text.read() src.append(ch) if ch in '"/\\': value.append(ch) elif ch == 'b': value.append('\b') elif ch == 'f': value.append('\f') elif ch == 'n': value.append('\n') elif ch == 'r': value.append('\r') elif ch == 't': value.append('\t') elif ch == 'u': n = 0 for i in range(4): ch = self.__text.read() src.append(ch) n = 16 * n + int(ch, 16) value.append(_chr(n)) else: raise UnexpectedCharacter(ch) else: return "".join(src), "".join(value) except AwaitingData: self.__text.undo() raise
def utf8_to_string(buff, length): chars = [] for _ in range(length): first_char = get_byte(buff) value = first_char >> 4 if value in (0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07): if first_char == 0: warning('at offset %x: single zero byte illegal' % buff.get_idx()) chars.append(chr(first_char)) elif value in (0x0c, 0x0d): second_char = get_byte(buff) if (second_char & 0xc0) != 0x80: warning('bad utf8 at offset: %x' % buff.get_idx()) value = ((first_char & 0x1f) << 6) | (second_char & 0x3f) if value != 0 and value < 0x80: warning( 'at offset %x: utf8 should have been represented with one byte encoding' % buff.get_idx()) chars.append(chr(value)) elif value == 0x0e: second_char = get_byte(buff) if second_char & 0xc0 != 0x80: warning('bad utf8 byte %x at offset %x' % (second_char, buff.get_idx())) third_char = get_byte(buff) if third_char & 0xc0 != 0x80: warning('bad utf8 byte %x at offset %x' % (third_char, buff.get_idx())) value = ((first_char & 0x0f) << 12) | ( (second_char & 0x3f) << 6) | (third_char & 0x3f) if value < 0x800: warning( 'at offset %x: utf8 should have been represented with two-byte encoding' % buff.get_idx()) chars.append(chr(value)) else: warning('at offset %x: illegal utf8' % buff.get_idx()) # FIXME correct handling of utf8? return ''.join(chars)
def get_language(self): x = self.locale & 0x0000ffff return chr(x & 0x00ff) + chr((x & 0xff00) >> 8)
def get_country(self): x = (self.locale & 0xffff0000) >> 16 return chr(x & 0x00ff) + chr((x & 0xff00) >> 8)
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): self.orig_filename = filename # Original file name in archive # Terminate the file name at the first null byte. Null bytes in file # names are used as tricks by viruses in archives. null_byte = filename.find(chr(0)) if null_byte >= 0: filename = filename[0:null_byte] # This is used to ensure paths in generated ZIP files always use # forward slashes as the directory separator, as required by the # ZIP format specification. if os.sep != "/" and os.sep in filename: filename = filename.replace(os.sep, "/") self.filename = filename # Normalized file name self.date_time = date_time # year, month, day, hour, min, sec # Standard values: self.compress_type = ZIP_STORED # Type of compression for the file self.comment = "" # Comment for each file self.extra = "" # ZIP extra data if sys.platform == 'win32': self.create_system = 0 # System which created ZIP archive else: # Assume everything else is unix-y self.create_system = 3 # System which created ZIP archive self.create_version = 20 # Version which created ZIP archive self.extract_version = 20 # Version needed to extract archive self.reserved = 0 # Must be zero self.flag_bits = 0 # ZIP flag bits self.volume = 0 # Volume number of file header self.internal_attr = 0 # Internal attributes self.external_attr = 0 # External file attributes # Other attributes are set by class ZipFile(object): # header_offset Byte offset to the file header # CRC CRC-32 of the uncompressed file # compress_size Size of the compressed file # file_size Size of the uncompressed file
def _UpdateKeys(self, c): self.key0 = self._crc32(c, self.key0) self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 self.key1 = (self.key1 * 134775813 + 1) & 4294967295 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
def python_unichr(chr_code): """ compatible function for chr(python 3.x) and unichr(python 2.x) """ try: from builtins import chr except ImportError: return unichr(chr_code) # python 2.x else: return chr(chr_code) # python 3.x
def dump(self, indentation=0): """Returns a string representation of the structure.""" dump = [] dump.append('[{0}]'.format(self.name)) printable_bytes = [ord(i) for i in string.printable if i not in string.whitespace] # Refer to the __set_format__ method for an explanation # of the following construct. for keys in self.__keys__: for key in keys: val = getattr(self, key) if isinstance(val, (int, long)): val_str = '0x%-8X' % (val) if key == 'TimeDateStamp' or key == 'dwTimeStamp': try: val_str += ' [%s UTC]' % time.asctime(time.gmtime(val)) except exceptions.ValueError as e: val_str += ' [INVALID TIME]' else: val_str = bytearray(val) val_str = ''.join( [chr(i) if (i in printable_bytes) else '\\x{0:02x}'.format(i) for i in val_str.rstrip(b'\x00')]) dump.append('0x%-8X 0x%-3X %-30s %s' % ( self.__field_offsets__[key] + self.__file_offset__, self.__field_offsets__[key], key+':', val_str)) return dump
def word(allow_empty=True): """ :return: a randomly generated (non-ASCII) word :rtype: unicode """ length = random.randint(0 if allow_empty else 1, 10) return u"".join(chr(random.randint(256, 512)) for _ in range(length))
def get_string_u_at_rva(self, rva, max_length = 2**16): """Get an Unicode string located at the given address.""" try: # If the RVA is invalid all would blow up. Some EXEs seem to be # specially nasty and have an invalid RVA. data = self.get_data(rva, 2) except PEFormatError as e: return None # max_length is the maximum count of 16bit characters # needs to be doubled to get size in bytes max_length <<= 1 requested = min(max_length, 256) data = self.get_data(rva, requested) # try to find null-termination null_index = -1 while True: null_index = data.find(b'\x00\x00', null_index + 1) if null_index == -1: data_length = len(data) if data_length < requested or data_length == max_length: null_index = len(data) >> 1 break else: # Request remaining part of data limited by max_length data += self.get_data(rva + data_length, max_length - data_length) null_index = requested - 1 requested = max_length elif null_index % 2 == 0: null_index >>= 1 break # convert selected part of the string to unicode uchrs = struct.unpack('<{:d}H'.format(null_index), data[:null_index * 2]) s = u''.join(map(chr, uchrs)) return s.encode('ascii', 'backslashreplace')
def get_unused_char_in_segmentation(segmentation, annotation_key=None): """Return a unicode character that does NOT appear in segmentation""" global_max = 0 for segment in segmentation: if annotation_key: text = segment.annotations.get( annotation_key, u'__none__', ) else: text = segment.get_content() local_max = max(ord(c) for c in text) if local_max > global_max: global_max = local_max return chr(global_max + 1)
def decode(self, text, location=None): self.doc = pyglet.text.document.FormattedDocument() self.length = 0 self.attributes = {} next_trailing_space = True trailing_newline = True for m in _pattern.finditer(text): group = m.lastgroup trailing_space = True if group == 'text': t = m.group('text') self.append(t) trailing_space = t.endswith(' ') trailing_newline = False elif group == 'nl_soft': if not next_trailing_space: self.append(' ') trailing_newline = False elif group in ('nl_hard1', 'nl_hard2'): self.append('\n') trailing_newline = True elif group == 'nl_para': self.append(m.group('nl_para')[1:]) # ignore the first \n trailing_newline = True elif group == 'attr': try: ast = parser.expr(m.group('attr_val')) if self.safe(ast): val = eval(ast.compile()) else: val = None except (parser.ParserError, SyntaxError): val = None name = m.group('attr_name') if name[0] == '.': if trailing_newline: self.attributes[name[1:]] = val else: self.doc.set_paragraph_style(self.length, self.length, {name[1:]: val}) else: self.attributes[name] = val elif group == 'escape_dec': self.append(chr(int(m.group('escape_dec_val')))) elif group == 'escape_hex': self.append(chr(int(m.group('escape_hex_val'), 16))) elif group == 'escape_lbrace': self.append('{') elif group == 'escape_rbrace': self.append('}') next_trailing_space = trailing_space return self.doc
def dump(self, indentation=0): """Returns a string representation of the structure.""" dump = [] dump.append('[{0}]'.format(self.name)) printable_bytes = [ord(i) for i in string.printable if i not in string.whitespace] # Refer to the __set_format__ method for an explanation # of the following construct. for keys in self.__keys__: for key in keys: val = getattr(self, key) if isinstance(val, (int, long)): val_str = '0x%-8X' % (val) if key == 'TimeDateStamp' or key == 'dwTimeStamp': try: val_str += ' [%s UTC]' % time.asctime(time.gmtime(val)) except exceptions.ValueError as e: val_str += ' [INVALID TIME]' else: val_str = val try: val_str = val_str.encode('ascii', 'backslashreplace') except: pass if isinstance(val_str[0], int): val_str = ''.join( [chr(i) if (i in printable_bytes) else '\\x{0:02x}'.format(i) for i in val_str.rstrip(b'\x00')]) else: val_str = ''.join( [i if (ord(i) in printable_bytes) else '\\x{0:02x}'.format(ord(i)) for i in val_str.rstrip(b'\x00')]) dump.append('0x%-8X 0x%-3X %-30s %s' % ( self.__field_offsets__[key] + self.__file_offset__, self.__field_offsets__[key], key+':', val_str)) return dump
def get_string_u_at_rva(self, rva, max_length = 2**16, encoding=None): """Get an Unicode string located at the given address.""" try: # If the RVA is invalid all would blow up. Some EXEs seem to be # specially nasty and have an invalid RVA. data = self.get_data(rva, 2) except PEFormatError as e: return None # max_length is the maximum count of 16bit characters # needs to be doubled to get size in bytes max_length <<= 1 requested = min(max_length, 256) data = self.get_data(rva, requested) # try to find null-termination null_index = -1 while True: null_index = data.find(b'\x00\x00', null_index + 1) if null_index == -1: data_length = len(data) if data_length < requested or data_length == max_length: null_index = len(data) >> 1 break else: # Request remaining part of data limited by max_length data += self.get_data(rva + data_length, max_length - data_length) null_index = requested - 1 requested = max_length elif null_index % 2 == 0: null_index >>= 1 break # convert selected part of the string to unicode uchrs = struct.unpack('<{:d}H'.format(null_index), data[:null_index * 2]) s = u''.join(map(chr, uchrs)) if encoding: return b(s.encode(encoding, 'backslashreplace')) return b(s.encode('utf-8', 'backslashreplace'))
def generate_checksum(self): # This will make sure that the data representing the PE image # is updated with any changes that might have been made by # assigning values to header fields as those are not automatically # updated upon assignment. # # data = self.write() # print('{0}'.format(len(data))) # for idx, b in enumerate(data): # if b != ord(self.__data__[idx]) or (idx > 1244440 and idx < 1244460): # print('Idx: {0} G {1:02x} {3} B {2:02x}'.format( # idx, ord(self.__data__[idx]), b, # self.__data__[idx], chr(b))) self.__data__ = self.write() # Get the offset to the CheckSum field in the OptionalHeader # (The offset is the same in PE32 and PE32+) checksum_offset = self.OPTIONAL_HEADER.get_file_offset() + 0x40 # 64 checksum = 0 # Verify the data is dword-aligned. Add padding if needed # remainder = len(self.__data__) % 4 data_len = len(self.__data__) + ((4-remainder) * ( remainder != 0 )) for i in range( int(data_len / 4) ): # Skip the checksum field if i == int(checksum_offset / 4): continue if i+1 == (int(data_len / 4)) and remainder: dword = struct.unpack('I', self.__data__[i*4:]+ ('\0' * (4-remainder)) )[0] else: dword = struct.unpack('I', self.__data__[ i*4 : i*4+4 ])[0] # Optimized the calculation (thanks to Emmanuel Bourg for pointing it out!) checksum += dword if checksum >= 2**32: checksum = (checksum & 0xffffffff) + (checksum >> 32) checksum = (checksum & 0xffff) + (checksum >> 16) checksum = (checksum) + (checksum >> 16) checksum = checksum & 0xffff # The length is the one of the original data, not the padded one # return checksum + len(self.__data__)