Decoding Japanese Text

The following code was written in response to a post on comp.lang.python to show how to obtain a unicode representation of text stored in the Shift-JIS encoding. The original text has also been Base64 encoded for delivery in an e-mail message:

   1 import base64, sys
   2 from qt import *
   3 
   4 a = \
   5 """SW4gdGhpcyBzYW1wbGUsIGUtbWFpbCB0aXRsZSBhbmQgdGV4dCBhcmUgd3JpdHRlbiBpbiBKYXBh
   6 bmVzZS4gDQpPdXIgbGFuZ3VhZ2UgaGFzIHRocmVlIHR5cGVzIGNhbGxlZCCBZ0thdGFrYW5hgWgs
   7 IIFnSGlyYWdhbmGBaCBhbmQgDQqBZ0thbmppgWguIA0KVGhpcyBlLW1haWwgY29udGFpbnMgYWxs
   8 IHRoZSB0eXBlcy4gDQqDQ4NOg1aDQYLNgmiCd4Jrgm6CYIJjglKBRIJQgk+CyYLEg4GBW4OLi0CU
   9 XILwIA0Kk/qWe4zqkc6JnoKzgrmC6YLngrWCooLFgreC5oFCIA0KgqCCooKkgqaCqIFBg0GDQ4NF
  10 g0eDSSANCoKpgquCrYKvgrGBQYNKg0yDToNQg1IgDQqCs4K1greCuYK7gUGDVINWg1iDWoNcIA0K
  11 gr2Cv4LCgsSCxoFBg16DYINjg2WDZyANCoLIgsmCyoLLgsyBQYNpg2qDa4Nsg20gDQqCzYLQgtOC
  12 1oLZgUGDboNxg3SDd4N6IA0KgtyC3YLegt+C4IFBg32DfoOAg4GDgiANCoLiguSC5oFBg4aDhoOI
  13 IA0KgueC6ILpguqC64FBg4mDioOLg4yDjSANCoLtgvCC8YFBg4+DSYOTIA0KgmCCYYJigmOCZIJl
  14 gmaCZ4JogmmCaoJrgmyCbYJugm+CcIJxgnKCc4J0gnWCdoJ3gniCeSANCoKBgoKCg4KEgoWChoKH
  15 goiCiYKKgouCjIKNgo6Cj4KQgpGCkoKTgpSClYKWgpeCmIKZgpogDQqBQoFBgWmBaoGBgXuBW4GW
  16 gY+BdYF2gUmBlIGQgZOBlYFggYSBg4GbgX6BooGggZmB9CANCpOMi56Tc49hkkqL5pHjgViW2IJS
  17 gXyCUYJUgXyCUiANCoKggqKCqIKikbmV25BWj2iDcoOLglCCVYpLIA0Kg0ODToNWg0GKlI6uie+O
  18 0CANCg=="""
  19 
  20 
  21 if __name__ == "__main__":
  22 
  23     app = QApplication(sys.argv)
  24     s = base64.decodestring(a)
  25     qs = QTextCodec.codecForName("Shift-JIS").toUnicode(s)
  26     l = QLabel(qs, None)
  27     l.show()
  28     app.setMainWidget(l)
  29     sys.exit(app.exec_loop())

This script decodes the Base64 string, uses the appropriate codec to convert the contents into a unicode representation, and shows the result in a label.

Note that you will need suitable fonts installed on your system to see the result.

Updated version which works with Python 2.6, PyQt4.4.4

   1 # -*- coding: iso-8859-15 -*-
   2 import base64, sys
   3 from PyQt4 import QtGui, QtCore
   4 
   5 a = \
   6 """SW4gdGhpcyBzYW1wbGUsIGUtbWFpbCB0aXRsZSBhbmQgdGV4dCBhcmUgd3JpdHRlbiBpbiBKYXBh
   7 bmVzZS4gDQpPdXIgbGFuZ3VhZ2UgaGFzIHRocmVlIHR5cGVzIGNhbGxlZCCBZ0thdGFrYW5hgWgs
   8 IIFnSGlyYWdhbmGBaCBhbmQgDQqBZ0thbmppgWguIA0KVGhpcyBlLW1haWwgY29udGFpbnMgYWxs
   9 IHRoZSB0eXBlcy4gDQqDQ4NOg1aDQYLNgmiCd4Jrgm6CYIJjglKBRIJQgk+CyYLEg4GBW4OLi0CU
  10 XILwIA0Kk/qWe4zqkc6JnoKzgrmC6YLngrWCooLFgreC5oFCIA0KgqCCooKkgqaCqIFBg0GDQ4NF
  11 g0eDSSANCoKpgquCrYKvgrGBQYNKg0yDToNQg1IgDQqCs4K1greCuYK7gUGDVINWg1iDWoNcIA0K
  12 gr2Cv4LCgsSCxoFBg16DYINjg2WDZyANCoLIgsmCyoLLgsyBQYNpg2qDa4Nsg20gDQqCzYLQgtOC
  13 1oLZgUGDboNxg3SDd4N6IA0KgtyC3YLegt+C4IFBg32DfoOAg4GDgiANCoLiguSC5oFBg4aDhoOI
  14 IA0KgueC6ILpguqC64FBg4mDioOLg4yDjSANCoLtgvCC8YFBg4+DSYOTIA0KgmCCYYJigmOCZIJl
  15 gmaCZ4JogmmCaoJrgmyCbYJugm+CcIJxgnKCc4J0gnWCdoJ3gniCeSANCoKBgoKCg4KEgoWChoKH
  16 goiCiYKKgouCjIKNgo6Cj4KQgpGCkoKTgpSClYKWgpeCmIKZgpogDQqBQoFBgWmBaoGBgXuBW4GW
  17 gY+BdYF2gUmBlIGQgZOBlYFggYSBg4GbgX6BooGggZmB9CANCpOMi56Tc49hkkqL5pHjgViW2IJS
  18 gXyCUYJUgXyCUiANCoKggqKCqIKikbmV25BWj2iDcoOLglCCVYpLIA0Kg0ODToNWg0GKlI6uie+O
  19 0CANCg=="""
  20 
  21 
  22 if __name__ == "__main__":
  23 
  24     app = QtGui.QApplication(sys.argv)
  25     s = base64.decodestring(a)
  26     qs = QtCore.QTextCodec.codecForName("Shift-JIS").toUnicode(s)
  27     l = QtGui.QLabel(qs, None)
  28     l.show()
  29     sys.exit(app.exec_())

jp.png (taken on windows XP)

PyQtWiki: Decoding_Japanese_Text (last edited 2009-06-29 09:42:06 by Dodoecchi)