ちょっと文字列を BASE64 にエンコードしようとしたら UNICODE 関連ではまった。
何を言ってるのかわからねーと思うが、おれも何が起きたのかわからなかった。
BASE64 エンコードしようとしたら UNICODE 関連でエラー!?
UnicodeEncodeError: 'ascii' codec can't encode characters in position x-y: ordinal not in range(128)
というわけで以下のリンクが非常に参考になった。
おかげでメールの件名を無事に BASE64 デコードできた。
#!/usr/bin/python # -*- coding: utf-8 -*- import base64 def decodeSubject(source, outputEncoding='utf_8'): result = '' for line in source.split('\n'): trimmed = line.strip() if trimmed.startswith('=?') and trimmed.endswith('?='): content = trimmed[2:-2] encodingName, b64Encoded = tuple(content.split('?B?')) binarySequence = base64.decodestring(b64Encoded) unicodeSequence = binarySequence.decode(encodingName.lower()) try: result += unicodeSequence.encode(outputEncoding) except UnicodeError: raise UnicodeError, 'The source contains incompatible character(s) with the outputEncoding(' + outputEncoding + ').' else: result += line return result if __name__ == '__main__': # Following UTF-8 subject contains Japanese and Korean, so it cannot convert to Japanese local encoding e.g. Shift_JIS. # subject = '=?UTF-8?B?5pmu6YCa44Gr5pel5pys6Kqe44Go44GL7ZWc6rWt7Ja044Go44GL?=' # Following JIS subject contains Japanese, so it cannot convert to Korean local encoding e.g. EUC-KR. # subject = ' =?ISO-2022-JP?B?GyRCJCIkJCQmJCgkKiMxIzIjMyM0IzUbKEo=?= ' # Following KS C 5601 subject contains Korean, so it cannot convert to Japanese local encoding e.g. Shift_JIS. subject = '=?iso-2022-kr?B?GyQpQ0lCTSBFUlAgT04gREVNQU5EDkEkOjgPIA4weEAvQEcPIA4wRw8oSUJNDkdRQSQ1NQ8p?=' # decoded = decodeSubject(subject, outputEncoding='euc-jp') # decoded = decodeSubject(subject, outputEncoding='shift-jis') # decoded = decodeSubject(subject, outputEncoding='iso-2022-jp') # decoded = decodeSubject(subject, outputEncoding='cp932') decoded = decodeSubject(subject, outputEncoding='utf-8') print 'decoded:' + decoded + '(' + str(type(decoded)) + '' print 'result:<' + base64.encodestring(decoded).strip() + '>'
こんなかんじ。