Python locale 模块,nl_langinfo() 实例源码
我们从Python开源项目中,提取了以下30个代码示例,用于说明如何使用locale.nl_langinfo()。
def _weekdaytext(day, plural=False):
"""
Use the locale to get the days
"""
text = []
day_num = None
if isinstance(day, rrule.weekday):
if day.n:
text.extend([_ordinal(day.n), ' '])
day_num = day.weekday + 2
if day_num == 8:
day_num = 1
elif isinstance(day, int):
day_num = day
elif isinstance(day, basestring):
day_num = DAYS.index(day)
if day_num is not None:
daystring = locale.nl_langinfo(getattr(locale, 'DAY_%s' % day_num))
if plural:
daystring = "%ss" % daystring
text.append(daystring)
return ''.join(text)
def set_default_encoding():
try:
locale.setlocale(locale.LC_ALL, '')
except:
print ('WARNING: Failed to set default libc locale, using en_US.UTF-8')
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
try:
enc = locale.getdefaultlocale()[1]
except Exception:
enc = None
if not enc:
enc = locale.nl_langinfo(locale.CODESET)
if not enc or enc.lower() == 'ascii':
enc = 'UTF-8'
try:
enc = codecs.lookup(enc).name
except LookupError:
enc = 'UTF-8'
sys.setdefaultencoding(enc)
del sys.setdefaultencoding
def __str__(self):
import locale
locale.setlocale(locale.LC_ALL, '')
output = []
if self.day is not None:
output.append(str(self.day))
if self.month is not None:
output.append(locale.nl_langinfo(getattr(locale, 'ABMON_%s' % self.month)))
if self.year is not None:
output.append(str(abs(self.year)))
if self.year:
if self.year < 0:
output.append('BCE')
else:
output.append('CE')
return " ".join(output)
def _monthtext(month):
"""
Use the locale to get the months
"""
if month < 1:
month = 1
elif month > 12:
month = 12
return locale.nl_langinfo(getattr(locale, 'MON_%s' % month))
def _getTerminalCharset():
"""
Function used by getTerminalCharset() to get terminal charset.
@see getTerminalCharset()
"""
# (1) Try locale.getpreferredencoding()
try:
charset = locale.getpreferredencoding()
if charset:
return charset
except (locale.Error, AttributeError):
pass
# (2) Try locale.nl_langinfo(CODESET)
try:
charset = locale.nl_langinfo(locale.CODESET)
if charset:
return charset
except (locale.Error, AttributeError):
pass
# (3) Try sys.stdout.encoding
if hasattr(sys.stdout, "encoding") and sys.stdout.encoding:
return sys.stdout.encoding
# (4) Otherwise, returns "ASCII"
return "ASCII"
def getTerminalCharset():
"""
Guess terminal charset using differents tests:
1. Try locale.getpreferredencoding()
2. Try locale.nl_langinfo(CODESET)
3. Try sys.stdout.encoding
4. Otherwise, returns "ASCII"
WARNING: Call initLocale() before calling this function.
"""
try:
return getTerminalCharset.value
except AttributeError:
getTerminalCharset.value = _getTerminalCharset()
return getTerminalCharset.value
def print_formatted(self, fp=sys.stdout, no_color=False,
show_cmd=False, show_user=False, show_pid=False,
gpuname_width=16,
):
# header
time_format = locale.nl_langinfo(locale.D_T_FMT)
header_msg = '{t.bold_white}{hostname}{t.normal} {timestr}'.format(**{
'hostname' : self.hostname,
'timestr' : self.query_time.strftime(time_format),
't' : term if not no_color \
else Terminal(force_styling=None)
})
fp.write(header_msg)
fp.write('\n')
# body
gpuname_width = max([gpuname_width] + [len(g.entry['name']) for g in self])
for g in self:
g.print_to(fp,
with_colors=not no_color,
show_cmd=show_cmd,
show_user=show_user,
show_pid=show_pid,
gpuname_width=gpuname_width)
fp.write('\n')
fp.flush()
def _getTerminalCharset():
"""
Function used by getTerminalCharset() to get terminal charset.
@see getTerminalCharset()
"""
# (1) Try locale.getpreferredencoding()
try:
charset = locale.getpreferredencoding()
if charset:
return charset
except (locale.Error, AttributeError):
pass
# (2) Try locale.nl_langinfo(CODESET)
try:
charset = locale.nl_langinfo(locale.CODESET)
if charset:
return charset
except (locale.Error, AttributeError):
pass
# (3) Try sys.stdout.encoding
if hasattr(sys.stdout, "encoding") and sys.stdout.encoding:
return sys.stdout.encoding
# (4) Otherwise, returns "ASCII"
return "ASCII"
def getTerminalCharset():
"""
Guess terminal charset using differents tests:
1. Try locale.getpreferredencoding()
2. Try locale.nl_langinfo(CODESET)
3. Try sys.stdout.encoding
4. Otherwise, returns "ASCII"
WARNING: Call initLocale() before calling this function.
"""
try:
return getTerminalCharset.value
except AttributeError:
getTerminalCharset.value = _getTerminalCharset()
return getTerminalCharset.value
def date_and_time_formatting_information():
print('Date and time formatting:')
names = ('datetime format (D_T_FMT)', 'date format (D_FMT)', 'time format (T_FMT)')
for index, option in enumerate((locale.D_T_FMT, locale.D_FMT, locale.T_FMT)):
print('\tOption: {0}, value: {1}'.format(names[index], locale.nl_langinfo(option)))
def day_names():
print('Day names (full and abbreviated):')
for i in range(7):
day_num = i + 1
day = getattr(locale, 'DAY_{0}'.format(day_num))
ab_day = getattr(locale, 'ABDAY_{0}'.format(day_num))
print('\tDay: {0}, name: {1}, abbreviated: {2}'.format(
day_num, locale.nl_langinfo(day), locale.nl_langinfo(ab_day)
))
def month_names():
print('Month names (full and abbreviated):')
for i in range(12):
mon_num = i + 1
mon = getattr(locale, 'MON_{0}'.format(mon_num))
ab_mon = getattr(locale, 'ABMON_{0}'.format(mon_num))
print('\tMonth: {0}, name: {1}, abbreviated: {2}'.format(
mon_num, locale.nl_langinfo(mon), locale.nl_langinfo(ab_mon)
))
def confirm(self, msg):
msg = unicode(msg)
if ctx.config.options and ctx.config.options.yes_all:
return True
while True:
import re
yesexpr = re.compile(locale.nl_langinfo(locale.YESEXPR))
prompt = msg + colorize(_(' (yes/no)'), 'red')
s = raw_input(prompt.encode('utf-8'))
if yesexpr.search(s):
return True
return False
def get_month_choices():
"""
Get the choices for months using the locale
"""
import locale
locale.setlocale(locale.LC_ALL, '')
output = [('00', 'No month')]
for i in range(1, 13):
output.append(("%02d" % i,
locale.nl_langinfo(getattr(locale, 'ABMON_%s' % i)))
)
return output
def main(use_pygame_console=0, interactmethod=default_interactmethod, print_banner=True, clear_main=True):
si, se, so = sys.stdin, sys.stderr, sys.stdout
try:
if 0 and use_pygame_console: # pygame currently borked
from pyrepl.pygame_console import PyGameConsole, FakeStdin, FakeStdout
con = PyGameConsole()
sys.stderr = sys.stdout = FakeStdout(con)
sys.stdin = FakeStdin(con)
else:
from pyrepl.unix_console import UnixConsole
try:
import locale
except ImportError:
encoding = None
else:
if hasattr(locale, 'nl_langinfo') \
and hasattr(locale, 'CODESET'):
encoding = locale.nl_langinfo(locale.CODESET)
elif os.environ.get('TERM_PROGRAM') == 'Apple_Terminal':
# /me whistles innocently...
code = int(os.popen(
"defaults read com.apple.Terminal StringEncoding"
).read())
if code == 4:
encoding = 'utf-8'
# More could go here -- and what's here isn't
# bulletproof. What would be? AppleScript?
# Doesn't seem to be possible.
else:
encoding = None
else:
encoding = None # so you get ASCII...
con = UnixConsole(os.dup(0), os.dup(1), None, encoding)
if print_banner:
print("Python", sys.version, "on", sys.platform)
print('Type "help", "copyright", "credits" or "license" '\
'for more information.')
sys.path.insert(0, os.getcwd())
if clear_main and __name__ != '__main__':
mainmod = imp.new_module('__main__')
sys.modules['__main__'] = mainmod
else:
mainmod = sys.modules['__main__']
rc = ReaderConsole(con, mainmod.__dict__)
rc.reader._module_list_ready = False
rc.run_user_init_file()
getattr(rc, interactmethod)()
finally:
sys.stdin, sys.stderr, sys.stdout = si, se, so
def test():
"""Simple test program."""
root = Tk()
root.withdraw()
fd = LoadFileDialog(root)
loadfile = fd.go(key="test")
fd = SaveFileDialog(root)
savefile = fd.go(key="test")
print(loadfile, savefile)
# Since the file name may contain non-ASCII characters, we need
# to find an encoding that likely supports the file name, and
# displays correctly on the terminal.
# Start off with UTF-8
enc = "utf-8"
import sys
# See whether CODESET is defined
try:
import locale
locale.setlocale(locale.LC_ALL,'')
enc = locale.nl_langinfo(locale.CODESET)
except (ImportError, AttributeError):
pass
# dialog for openening files
openfilename=askopenfilename(filetypes=[("all files", "*")])
try:
fp=open(openfilename,"r")
fp.close()
except:
print("Could not open File: ")
print(sys.exc_info()[1])
print("open", openfilename.encode(enc))
# dialog for saving files
saveasfilename=asksaveasfilename()
print("saveas", saveasfilename.encode(enc))
def test():
"""Simple test program."""
root = Tk()
root.withdraw()
fd = LoadFileDialog(root)
loadfile = fd.go(key="test")
fd = SaveFileDialog(root)
savefile = fd.go(key="test")
print(loadfile, savefile)
# Since the file name may contain non-ASCII characters, we need
# to find an encoding that likely supports the file name, and
# displays correctly on the terminal.
# Start off with UTF-8
enc = "utf-8"
import sys
# See whether CODESET is defined
try:
import locale
locale.setlocale(locale.LC_ALL,'')
enc = locale.nl_langinfo(locale.CODESET)
except (ImportError, AttributeError):
pass
# dialog for openening files
openfilename=askopenfilename(filetypes=[("all files", "*")])
try:
fp=open(openfilename,"r")
fp.close()
except:
print("Could not open File: ")
print(sys.exc_info()[1])
print("open", openfilename.encode(enc))
# dialog for saving files
saveasfilename=asksaveasfilename()
print("saveas", saveasfilename.encode(enc))
def test():
"""Simple test program."""
root = Tk()
root.withdraw()
fd = LoadFileDialog(root)
loadfile = fd.go(key="test")
fd = SaveFileDialog(root)
savefile = fd.go(key="test")
print(loadfile, savefile)
# Since the file name may contain non-ASCII characters, we need
# to find an encoding that likely supports the file name, and
# displays correctly on the terminal.
# Start off with UTF-8
enc = "utf-8"
import sys
# See whether CODESET is defined
try:
import locale
locale.setlocale(locale.LC_ALL,'')
enc = locale.nl_langinfo(locale.CODESET)
except (ImportError, AttributeError):
pass
# dialog for openening files
openfilename=askopenfilename(filetypes=[("all files", "*")])
try:
fp=open(openfilename,"r")
fp.close()
except:
print("Could not open File: ")
print(sys.exc_info()[1])
print("open", openfilename.encode(enc))
# dialog for saving files
saveasfilename=asksaveasfilename()
print("saveas", saveasfilename.encode(enc))
def guess_encoding(data):
"""
Given a byte string, attempt to decode it.
Tries the standard 'UTF8' and 'latin-1' encodings,
Plus several gathered from locale information.
The calling program *must* first call::
locale.setlocale(locale.LC_ALL, '')
If successful it returns ``(decoded_unicode, successful_encoding)``.
If unsuccessful it raises a ``UnicodeError``.
"""
successful_encoding = None
# we make 'utf-8' the first encoding
encodings = ['utf-8']
#
# next we add anything we can learn from the locale
try:
encodings.append(locale.nl_langinfo(locale.CODESET))
except AttributeError:
pass
try:
encodings.append(locale.getlocale()[1])
except (AttributeError, IndexError):
pass
try:
encodings.append(locale.getdefaultlocale()[1])
except (AttributeError, IndexError):
pass
#
# we try 'latin-1' last
encodings.append('latin-1')
for enc in encodings:
# some of the locale calls
# may have returned None
if not enc:
continue
try:
decoded = text_type(data, enc)
successful_encoding = enc
except (UnicodeError, LookupError):
pass
else:
break
if not successful_encoding:
raise UnicodeError(
'Unable to decode input data. Tried the following encodings: %s.'
% ', '.join([repr(enc) for enc in encodings if enc]))
else:
return (decoded, successful_encoding)
##########################################################################
# Remove repeated elements from a list deterministcally
##########################################################################
def guess_encoding(data):
"""
Given a byte string, attempt to decode it.
Tries the standard 'UTF8' and 'latin-1' encodings,
Plus several gathered from locale information.
The calling program *must* first call::
locale.setlocale(locale.LC_ALL, '')
If successful it returns ``(decoded_unicode, successful_encoding)``.
If unsuccessful it raises a ``UnicodeError``.
"""
successful_encoding = None
# we make 'utf-8' the first encoding
encodings = ['utf-8']
#
# next we add anything we can learn from the locale
try:
encodings.append(locale.nl_langinfo(locale.CODESET))
except AttributeError:
pass
try:
encodings.append(locale.getlocale()[1])
except (AttributeError, IndexError):
pass
try:
encodings.append(locale.getdefaultlocale()[1])
except (AttributeError, IndexError):
pass
#
# we try 'latin-1' last
encodings.append('latin-1')
for enc in encodings:
# some of the locale calls
# may have returned None
if not enc:
continue
try:
decoded = text_type(data, enc)
successful_encoding = enc
except (UnicodeError, LookupError):
pass
else:
break
if not successful_encoding:
raise UnicodeError(
'Unable to decode input data. Tried the following encodings: %s.'
% ', '.join([repr(enc) for enc in encodings if enc]))
else:
return (decoded, successful_encoding)
##########################################################################
# Remove repeated elements from a list deterministcally
##########################################################################
def print_formatted(self, fp=sys.stdout, force_color=False, no_color=False,
show_cmd=False, show_user=False, show_pid=False,
show_power=None, gpuname_width=16,
show_header=True,
):
# ANSI color configuration
if force_color and no_color:
raise ValueError("--color and --no_color can't be used at the same time")
if force_color:
t_color = Terminal(kind='xterm-color', force_styling=True)
elif no_color:
t_color = Terminal(force_styling=None)
else:
t_color = Terminal() # auto, depending on isatty
# header
if show_header:
time_format = locale.nl_langinfo(locale.D_T_FMT)
header_msg = '{t.bold_white}{hostname}{t.normal} {timestr}'.format(**{
'hostname': self.hostname,
'timestr': self.query_time.strftime(time_format),
't': t_color,
})
fp.write(header_msg)
fp.write('\n')
# body
gpuname_width = max([gpuname_width] + [len(g.entry['name']) for g in self])
for g in self:
g.print_to(fp,
show_cmd=show_cmd,
show_user=show_user,
show_pid=show_pid,
show_power=show_power,
gpuname_width=gpuname_width,
term=t_color)
fp.write('\n')
fp.flush()
def guess_encoding(data):
"""
Given a byte string, attempt to decode it.
Tries the standard 'UTF8' and 'latin-1' encodings,
Plus several gathered from locale information.
The calling program *must* first call::
locale.setlocale(locale.LC_ALL, '')
If successful it returns ``(decoded_unicode, successful_encoding)``.
If unsuccessful it raises a ``UnicodeError``.
"""
successful_encoding = None
# we make 'utf-8' the first encoding
encodings = ['utf-8']
#
# next we add anything we can learn from the locale
try:
encodings.append(locale.nl_langinfo(locale.CODESET))
except AttributeError:
pass
try:
encodings.append(locale.getlocale()[1])
except (AttributeError, IndexError):
pass
try:
encodings.append(locale.getdefaultlocale()[1])
except (AttributeError, IndexError):
pass
#
# we try 'latin-1' last
encodings.append('latin-1')
for enc in encodings:
# some of the locale calls
# may have returned None
if not enc:
continue
try:
decoded = text_type(data, enc)
successful_encoding = enc
except (UnicodeError, LookupError):
pass
else:
break
if not successful_encoding:
raise UnicodeError(
'Unable to decode input data. Tried the following encodings: %s.'
% ', '.join([repr(enc) for enc in encodings if enc]))
else:
return (decoded, successful_encoding)
##########################################################################
# Remove repeated elements from a list deterministcally
##########################################################################
def main(use_pygame_console=0, interactmethod=default_interactmethod, print_banner=True, clear_main=True):
si, se, so = sys.stdin, sys.stderr, sys.stdout
try:
if 0 and use_pygame_console: # pygame currently borked
from pyrepl.pygame_console import PyGameConsole, FakeStdin, FakeStdout
con = PyGameConsole()
sys.stderr = sys.stdout = FakeStdout(con)
sys.stdin = FakeStdin(con)
else:
from pyrepl.unix_console import UnixConsole
try:
import locale
except ImportError:
encoding = None
else:
if hasattr(locale, 'nl_langinfo') \
and hasattr(locale, 'CODESET'):
encoding = locale.nl_langinfo(locale.CODESET)
elif os.environ.get('TERM_PROGRAM') == 'Apple_Terminal':
# /me whistles innocently...
code = int(os.popen(
"defaults read com.apple.Terminal StringEncoding"
).read())
if code == 4:
encoding = 'utf-8'
# More could go here -- and what's here isn't
# bulletproof. What would be? AppleScript?
# Doesn't seem to be possible.
else:
encoding = None
else:
encoding = None # so you get ASCII...
con = UnixConsole(os.dup(0), os.dup(1), None, encoding)
if print_banner:
print "Python", sys.version, "on", sys.platform
print 'Type "help", "copyright", "credits" or "license" '\
'for more information.'
sys.path.insert(0, os.getcwd())
if clear_main and __name__ != '__main__':
mainmod = new.module('__main__')
sys.modules['__main__'] = mainmod
else:
mainmod = sys.modules['__main__']
rc = ReaderConsole(con, mainmod.__dict__)
rc.reader._module_list_ready = False
rc.run_user_init_file()
getattr(rc, interactmethod)()
finally:
sys.stdin, sys.stderr, sys.stdout = si, se, so
def test():
"""Simple test program."""
root = Tk()
root.withdraw()
fd = LoadFileDialog(root)
loadfile = fd.go(key="test")
fd = SaveFileDialog(root)
savefile = fd.go(key="test")
print(loadfile, savefile)
# Since the file name may contain non-ASCII characters, we need
# to find an encoding that likely supports the file name, and
# displays correctly on the terminal.
# Start off with UTF-8
enc = "utf-8"
import sys
# See whether CODESET is defined
try:
import locale
locale.setlocale(locale.LC_ALL,'')
enc = locale.nl_langinfo(locale.CODESET)
except (ImportError, AttributeError):
pass
# dialog for openening files
openfilename=askopenfilename(filetypes=[("all files", "*")])
try:
fp=open(openfilename,"r")
fp.close()
except:
print("Could not open File: ")
print(sys.exc_info()[1])
print("open", openfilename.encode(enc))
# dialog for saving files
saveasfilename=asksaveasfilename()
print("saveas", saveasfilename.encode(enc))
def guess_encoding(data):
"""
Given a byte string, attempt to decode it.
Tries the standard 'UTF8' and 'latin-1' encodings,
Plus several gathered from locale information.
The calling program *must* first call::
locale.setlocale(locale.LC_ALL, '')
If successful it returns ``(decoded_unicode, successful_encoding)``.
If unsuccessful it raises a ``UnicodeError``.
"""
successful_encoding = None
# we make 'utf-8' the first encoding
encodings = ['utf-8']
#
# next we add anything we can learn from the locale
try:
encodings.append(locale.nl_langinfo(locale.CODESET))
except AttributeError:
pass
try:
encodings.append(locale.getlocale()[1])
except (AttributeError, IndexError):
pass
try:
encodings.append(locale.getdefaultlocale()[1])
except (AttributeError, IndexError):
pass
#
# we try 'latin-1' last
encodings.append('latin-1')
for enc in encodings:
# some of the locale calls
# may have returned None
if not enc:
continue
try:
decoded = text_type(data, enc)
successful_encoding = enc
except (UnicodeError, LookupError):
pass
else:
break
if not successful_encoding:
raise UnicodeError(
'Unable to decode input data. Tried the following encodings: %s.'
% ', '.join([repr(enc) for enc in encodings if enc]))
else:
return (decoded, successful_encoding)
##########################################################################
# Remove repeated elements from a list deterministcally
##########################################################################
def guess_encoding(data):
"""
Given a byte string, attempt to decode it.
Tries the standard 'UTF8' and 'latin-1' encodings,
Plus several gathered from locale information.
The calling program *must* first call::
locale.setlocale(locale.LC_ALL, '')
If successful it returns ``(decoded_unicode, successful_encoding)``.
If unsuccessful it raises a ``UnicodeError``.
"""
successful_encoding = None
# we make 'utf-8' the first encoding
encodings = ['utf-8']
#
# next we add anything we can learn from the locale
try:
encodings.append(locale.nl_langinfo(locale.CODESET))
except AttributeError:
pass
try:
encodings.append(locale.getlocale()[1])
except (AttributeError, IndexError):
pass
try:
encodings.append(locale.getdefaultlocale()[1])
except (AttributeError, IndexError):
pass
#
# we try 'latin-1' last
encodings.append('latin-1')
for enc in encodings:
# some of the locale calls
# may have returned None
if not enc:
continue
try:
decoded = text_type(data, enc)
successful_encoding = enc
except (UnicodeError, LookupError):
pass
else:
break
if not successful_encoding:
raise UnicodeError(
'Unable to decode input data. Tried the following encodings: %s.'
% ', '.join([repr(enc) for enc in encodings if enc]))
else:
return (decoded, successful_encoding)
##########################################################################
# Remove repeated elements from a list deterministcally
##########################################################################
def test():
"""Simple test program."""
root = Tk()
root.withdraw()
fd = LoadFileDialog(root)
loadfile = fd.go(key="test")
fd = SaveFileDialog(root)
savefile = fd.go(key="test")
print(loadfile, savefile)
# Since the file name may contain non-ASCII characters, we need
# to find an encoding that likely supports the file name, and
# displays correctly on the terminal.
# Start off with UTF-8
enc = "utf-8"
import sys
# See whether CODESET is defined
try:
import locale
locale.setlocale(locale.LC_ALL,'')
enc = locale.nl_langinfo(locale.CODESET)
except (ImportError, AttributeError):
pass
# dialog for openening files
openfilename=askopenfilename(filetypes=[("all files", "*")])
try:
fp=open(openfilename,"r")
fp.close()
except:
print("Could not open File: ")
print(sys.exc_info()[1])
print("open", openfilename.encode(enc))
# dialog for saving files
saveasfilename=asksaveasfilename()
print("saveas", saveasfilename.encode(enc))
def guess_encoding(data):
"""
Given a byte string, attempt to decode it.
Tries the standard 'UTF8' and 'latin-1' encodings,
Plus several gathered from locale information.
The calling program *must* first call::
locale.setlocale(locale.LC_ALL, '')
If successful it returns ``(decoded_unicode, successful_encoding)``.
If unsuccessful it raises a ``UnicodeError``.
"""
successful_encoding = None
# we make 'utf-8' the first encoding
encodings = ['utf-8']
#
# next we add anything we can learn from the locale
try:
encodings.append(locale.nl_langinfo(locale.CODESET))
except AttributeError:
pass
try:
encodings.append(locale.getlocale()[1])
except (AttributeError, IndexError):
pass
try:
encodings.append(locale.getdefaultlocale()[1])
except (AttributeError, IndexError):
pass
#
# we try 'latin-1' last
encodings.append('latin-1')
for enc in encodings:
# some of the locale calls
# may have returned None
if not enc:
continue
try:
decoded = text_type(data, enc)
successful_encoding = enc
except (UnicodeError, LookupError):
pass
else:
break
if not successful_encoding:
raise UnicodeError(
'Unable to decode input data. Tried the following encodings: %s.'
% ', '.join([repr(enc) for enc in encodings if enc]))
else:
return (decoded, successful_encoding)
##########################################################################
# Remove repeated elements from a list deterministcally
##########################################################################
def guess_encoding(data):
"""
Given a byte string, attempt to decode it.
Tries the standard 'UTF8' and 'latin-1' encodings,
Plus several gathered from locale information.
The calling program *must* first call::
locale.setlocale(locale.LC_ALL, '')
If successful it returns ``(decoded_unicode, successful_encoding)``.
If unsuccessful it raises a ``UnicodeError``.
"""
successful_encoding = None
# we make 'utf-8' the first encoding
encodings = ['utf-8']
#
# next we add anything we can learn from the locale
try:
encodings.append(locale.nl_langinfo(locale.CODESET))
except AttributeError:
pass
try:
encodings.append(locale.getlocale()[1])
except (AttributeError, IndexError):
pass
try:
encodings.append(locale.getdefaultlocale()[1])
except (AttributeError, IndexError):
pass
#
# we try 'latin-1' last
encodings.append('latin-1')
for enc in encodings:
# some of the locale calls
# may have returned None
if not enc:
continue
try:
decoded = text_type(data, enc)
successful_encoding = enc
except (UnicodeError, LookupError):
pass
else:
break
if not successful_encoding:
raise UnicodeError(
'Unable to decode input data. Tried the following encodings: %s.'
% ', '.join([repr(enc) for enc in encodings if enc]))
else:
return (decoded, successful_encoding)
##########################################################################
# Remove repeated elements from a list deterministcally
##########################################################################
def guess_encoding(data):
"""
Given a byte string, attempt to decode it.
Tries the standard 'UTF8' and 'latin-1' encodings,
Plus several gathered from locale information.
The calling program *must* first call::
locale.setlocale(locale.LC_ALL, '')
If successful it returns ``(decoded_unicode, successful_encoding)``.
If unsuccessful it raises a ``UnicodeError``.
"""
successful_encoding = None
# we make 'utf-8' the first encoding
encodings = ['utf-8']
#
# next we add anything we can learn from the locale
try:
encodings.append(locale.nl_langinfo(locale.CODESET))
except AttributeError:
pass
try:
encodings.append(locale.getlocale()[1])
except (AttributeError, IndexError):
pass
try:
encodings.append(locale.getdefaultlocale()[1])
except (AttributeError, IndexError):
pass
#
# we try 'latin-1' last
encodings.append('latin-1')
for enc in encodings:
# some of the locale calls
# may have returned None
if not enc:
continue
try:
decoded = text_type(data, enc)
successful_encoding = enc
except (UnicodeError, LookupError):
pass
else:
break
if not successful_encoding:
raise UnicodeError(
'Unable to decode input data. Tried the following encodings: %s.'
% ', '.join([repr(enc) for enc in encodings if enc]))
else:
return (decoded, successful_encoding)
##########################################################################
# Remove repeated elements from a list deterministcally
##########################################################################