1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 |
- ######################## BEGIN LICENSE BLOCK ########################
- # The Original Code is mozilla.org code.
- #
- # The Initial Developer of the Original Code is
- # Netscape Communications Corporation.
- # Portions created by the Initial Developer are Copyright (C) 1998
- # the Initial Developer. All Rights Reserved.
- #
- # Contributor(s):
- # Mark Pilgrim - port to Python
- #
- # This library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Lesser General Public
- # License as published by the Free Software Foundation; either
- # version 2.1 of the License, or (at your option) any later version.
- #
- # This library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Lesser General Public License for more details.
- #
- # You should have received a copy of the GNU Lesser General Public
- # License along with this library; if not, write to the Free Software
- # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- # 02110-1301 USA
- ######################### END LICENSE BLOCK #########################
- from . import constants
- from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
- ISO2022KRSMModel)
- from .charsetprober import CharSetProber
- from .codingstatemachine import CodingStateMachine
- from .compat import wrap_ord
- class EscCharSetProber(CharSetProber):
- def __init__(self):
- CharSetProber.__init__(self)
- self._mCodingSM = [
- CodingStateMachine(HZSMModel),
- CodingStateMachine(ISO2022CNSMModel),
- CodingStateMachine(ISO2022JPSMModel),
- CodingStateMachine(ISO2022KRSMModel)
- ]
- self.reset()
- def reset(self):
- CharSetProber.reset(self)
- for codingSM in self._mCodingSM:
- if not codingSM:
- continue
- codingSM.active = True
- codingSM.reset()
- self._mActiveSM = len(self._mCodingSM)
- self._mDetectedCharset = None
- def get_charset_name(self):
- return self._mDetectedCharset
- def get_confidence(self):
- if self._mDetectedCharset:
- return 0.99
- else:
- return 0.00
- def feed(self, aBuf):
- for c in aBuf:
- # PY3K: aBuf is a byte array, so c is an int, not a byte
- for codingSM in self._mCodingSM:
- if not codingSM:
- continue
- if not codingSM.active:
- continue
- codingState = codingSM.next_state(wrap_ord(c))
- if codingState == constants.eError:
- codingSM.active = False
- self._mActiveSM -= 1
- if self._mActiveSM <= 0:
- self._mState = constants.eNotMe
- return self.get_state()
- elif codingState == constants.eItsMe:
- self._mState = constants.eFoundIt
- self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8
- return self.get_state()
- return self.get_state()
|