我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用locale.getpreferredencoding()。
def decode_string(string, encoding=None): """Decode string back to unicode. Decodes the given string back to unicode. The process ignores all the characters that the encoding does not support. Args: string (string): String to decode. encoding (string): Encoding to use during the decode process. If no encoding is given then it uses the default system encoding. """ if isinstance(string, unicode): return string if encoding is None: encoding = locale.getpreferredencoding() return string.decode(encoding, "ignore")
def read_text_file(filename): """Return the contents of *filename*. Try to decode the file contents with utf-8, the preferred system encoding (e.g., cp1252 on some Windows machines), and latin1, in that order. Decoding a byte string with latin1 will never raise an error. In the worst case, the returned string will contain some garbage characters. """ with open(filename, 'rb') as fp: data = fp.read() encodings = ['utf-8', locale.getpreferredencoding(False), 'latin1'] for enc in encodings: try: data = data.decode(enc) except UnicodeDecodeError: continue break assert type(data) != bytes # Latin1 should have worked. return data
def version_getter(config): """Get tag associated with HEAD; fall back to SHA1. If HEAD is tagged, return the tag name; otherwise fall back to HEAD's short SHA1 hash. .. note:: Only annotated tags are considered. TODO: Support non-annotated tags? """ try: check_output(['git', 'rev-parse', '--is-inside-work-tree'], stderr=DEVNULL) except CalledProcessError: return None encoding = getpreferredencoding(do_setlocale=False) try: version = check_output(['git', 'describe', '--exact-match'], stderr=DEVNULL) except CalledProcessError: version = check_output(['git', 'rev-parse', '--short', 'HEAD']) version = version.decode(encoding).strip() return version
def read_text_file(filename): """Return the contents of *filename*. Try to decode the file contents with utf-8, the preffered system encoding (e.g., cp1252 on some Windows machines) and latin1, in that order. Decoding a byte string with latin1 will never raise an error. In the worst case, the returned string will contain some garbage characters. """ with open(filename, 'rb') as fp: data = fp.read() encodings = ['utf-8', locale.getpreferredencoding(False), 'latin1'] for enc in encodings: try: data = data.decode(enc) except UnicodeDecodeError: continue break assert type(data) != bytes # Latin1 should have worked. return data
def decode_as_string(text, encoding=None): """ Decode the console or file output explicitly using getpreferredencoding. The text paraemeter should be a encoded string, if not no decode occurs If no encoding is given, getpreferredencoding is used. If encoding is specified, that is used instead. This would be needed for SVN --xml output. Unicode is explicitly put in composed NFC form. --xml should be UTF-8 (SVN Issue 2938) the discussion on the Subversion DEV List from 2007 seems to indicate the same. """ #text should be a byte string if encoding is None: encoding = _console_encoding if not isinstance(text, unicode): text = text.decode(encoding) text = unicodedata.normalize('NFC', text) return text
def main(args=None): if args is None: args = sys.argv[1:] # Configure our deprecation warnings to be sent through loggers deprecation.install_warning_logger() autocomplete() try: cmd_name, cmd_args = parseopts(args) except PipError as exc: sys.stderr.write("ERROR: %s" % exc) sys.stderr.write(os.linesep) sys.exit(1) # Needed for locale.getpreferredencoding(False) to work # in pip._internal.utils.encoding.auto_decode try: locale.setlocale(locale.LC_ALL, '') except locale.Error as e: # setlocale can apparently crash if locale are uninitialized logger.debug("Ignoring error %s when setting locale", e) command = commands_dict[cmd_name](isolated=check_isolated(cmd_args)) return command.main(cmd_args)
def force_unicode(s): """Do all kinds of magic to turn `s` into unicode""" # It's already unicode, don't do anything: #if isinstance(s, six.text_type): # return s # Try some default encodings: try: return s.decode('utf-8') except UnicodeDecodeError as exc: pass try: return s.decode(locale.getpreferredencoding()) except: return (_('Unicode conversion error')) if chardet is not None: # Try chardet, if available encoding = chardet.detect(s)['encoding'] if encoding is not None: return s.decode(encoding) raise # Give up.
def lnpgettext(self, context, singular, plural, num): """Equivalent to ``npgettext()``, but the translation is returned in the preferred system encoding, if no other encoding was explicitly set with ``bind_textdomain_codeset()``. """ ctxt_msg_id = self.CONTEXT_ENCODING % (context, singular) try: tmsg = self._catalog[(ctxt_msg_id, self.plural(num))] if self._output_charset: return tmsg.encode(self._output_charset) return tmsg.encode(locale.getpreferredencoding()) except KeyError: if self._fallback: return self._fallback.lnpgettext(context, singular, plural, num) if num == 1: return singular else: return plural
def decode_windows_line(self, text): # Import only for Windows import locale, subprocess # STDERR gets the wrong encoding, use chcp to get the real one proccess = subprocess.Popen(["chcp"], shell=True, stdout=subprocess.PIPE) (chcp, _) = proccess.communicate() # Decode using the locale preferred encoding (for example 'cp1251') and remove newlines chcp = chcp.decode(locale.getpreferredencoding()).strip() # Get the actual number chcp = chcp.split(" ")[-1] # Actually decode return text.decode("cp" + chcp)
def guess_decode(text): """Decode *text* with guessed encoding. First try UTF-8; this should fail for non-UTF-8 encodings. Then try the preferred locale encoding. Fall back to latin-1, which always works. """ try: text = text.decode('utf-8') return text, 'utf-8' except UnicodeDecodeError: try: import locale prefencoding = locale.getpreferredencoding() text = text.decode() return text, prefencoding except (UnicodeDecodeError, LookupError): text = text.decode('latin1') return text, 'latin1'
def decode(bytes): """ Decode and return a byte string using utf8, falling back to system's encoding if that fails. So far we only have to do this because javac is so utterly hopeless it uses CP1252 for its output on Windows instead of UTF8, even if the input encoding is specified as UTF8. Brilliant! But then what else would you expect from Oracle? """ if not bytes: return '' try: return bytes.decode('utf8') except UnicodeError: return bytes.decode(locale.getpreferredencoding(), errors='replace')
def run_terminal(self, dir_, parameters): try: if not dir_: raise NotFoundError('The file open in the selected view has ' + 'not yet been saved') for k, v in enumerate(parameters): parameters[k] = v.replace('%CWD%', dir_) args = [TerminalSelector.get()] args.extend(parameters) encoding = locale.getpreferredencoding(do_setlocale=True) if sys.version_info >= (3,): cwd = dir_ else: cwd = dir_.encode(encoding) subprocess.Popen(args, cwd=cwd) except (OSError) as exception: print(str(exception)) sublime.error_message('Terminal: The terminal ' + TerminalSelector.get() + ' was not found') except (Exception) as exception: sublime.error_message('Terminal: ' + str(exception))
def main(args=None): if args is None: args = sys.argv[1:] # Configure our deprecation warnings to be sent through loggers deprecation.install_warning_logger() autocomplete() try: cmd_name, cmd_args = parseopts(args) except PipError as exc: sys.stderr.write("ERROR: %s" % exc) sys.stderr.write(os.linesep) sys.exit(1) # Needed for locale.getpreferredencoding(False) to work # in pip.utils.encoding.auto_decode locale.setlocale(locale.LC_ALL, '') command = commands_dict[cmd_name](isolated=check_isolated(cmd_args)) return command.main(cmd_args) # ########################################################### # # Writing freeze files
def _get_text_writer(stream, errors): # In python3, all the sys.stdout/sys.stderr streams are in text # mode. This means they expect unicode, and will encode the # unicode automatically before actually writing to stdout/stderr. # In python2, that's not the case. In order to provide a consistent # interface, we can create a wrapper around sys.stdout that will take # unicode, and automatically encode it to the preferred encoding. # That way consumers can just call get_text_writer(stream) and write # unicode to the returned stream. Note that get_text_writer # just returns the stream in the PY3 section above because python3 # handles this. # We're going to use the preferred encoding, but in cases that there is # no preferred encoding we're going to fall back to assuming ASCII is # what we should use. This will currently break the use of # PYTHONIOENCODING, which would require checking stream.encoding first, # however, the existing behavior is to only use # locale.getpreferredencoding() and so in the hope of not breaking what # is currently working, we will continue to only use that. encoding = locale.getpreferredencoding() if encoding is None: encoding = "ascii" return codecs.getwriter(encoding)(stream, errors)
def _unicode(string, encoding=None): """Try to decode byte strings to unicode. This can only be a guess, but this might be better than failing. It is safe to use this on numbers or strings that are already unicode. """ if isinstance(string, compat.unicode): unicode_string = string elif isinstance(string, compat.bytes): # use given encoding, stdin, preferred until something != None is found if encoding is None: encoding = sys.stdin.encoding if encoding is None: encoding = locale.getpreferredencoding() unicode_string = string.decode(encoding, "ignore") else: unicode_string = compat.unicode(string) return unicode_string.replace('\x00', '').strip()
def repl(): """Read-Eval-Print-Loop for reading the query, printing it and its parse tree. Exit the loop either with an interrupt or "quit". """ while True: try: sys.stdout.write("Type in next query: \n> ") import locale query_str = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)) except KeyboardInterrupt: break if u'quit' in query_str: break print_query_and_parse_tree(query_str)
def pager_print(expr, **settings): """Prints expr using the pager, in pretty form. This invokes a pager command using pydoc. Lines are not wrapped automatically. This routine is meant to be used with a pager that allows sideways scrolling, like ``less -S``. Parameters are the same as for ``pretty_print``. If you wish to wrap lines, pass ``num_columns=None`` to auto-detect the width of the terminal. """ from pydoc import pager from locale import getpreferredencoding if 'num_columns' not in settings: settings['num_columns'] = 500000 # disable line wrap pager(pretty(expr, **settings).encode(getpreferredencoding()))