Merge branch 'refs/heads/develop' into feature/PYPE-504_run_action_on_auto_sync

# Conflicts:
#	pype/ftrack/events/event_sync_to_avalon.py
This commit is contained in:
Milan Kolar 2019-11-26 12:24:07 +01:00
commit ea3797391b
701 changed files with 861 additions and 166445 deletions

View file

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2018 pype club
Copyright (c) 2018 orbi tools s.r.o
Permission is hereby granted, free of charge, to any person obtaining a copy

View file

@ -3,7 +3,7 @@ import sys
import argparse
import logging
import json
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction, MissingPermision
from pype.clockify import ClockifyAPI

View file

@ -1,6 +1,6 @@
import os
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction
from pype.ftrack.lib.io_nonsingleton import DbConnector

View file

@ -2,7 +2,7 @@ import sys
import argparse
import logging
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction

View file

@ -3,7 +3,7 @@ import sys
import argparse
import logging
import subprocess
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction

View file

@ -4,7 +4,7 @@ import argparse
import json
import arrow
import logging
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction, get_ca_mongoid
from pypeapp import config
from ftrack_api.exception import NoResultFoundError

View file

@ -4,7 +4,7 @@ import logging
import argparse
import re
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction
from avalon import lib as avalonlib
from pype.ftrack.lib.io_nonsingleton import DbConnector

View file

@ -4,7 +4,7 @@ import re
import argparse
import logging
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction
from pypeapp import config

View file

@ -4,7 +4,7 @@ import json
import argparse
import logging
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction

View file

@ -3,7 +3,7 @@ import sys
import logging
from bson.objectid import ObjectId
import argparse
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction
from pype.ftrack.lib.io_nonsingleton import DbConnector

View file

@ -2,7 +2,7 @@ import os
import sys
import logging
import argparse
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction
from pype.ftrack.lib.io_nonsingleton import DbConnector

View file

@ -4,7 +4,7 @@ import json
import logging
import subprocess
from operator import itemgetter
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction
from pypeapp import Logger, config

View file

@ -4,7 +4,7 @@ import argparse
import logging
import json
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction

View file

@ -2,7 +2,7 @@ import os
import sys
import argparse
import logging
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction

View file

@ -2,12 +2,12 @@ import os
import json
from ruamel import yaml
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction
from pypeapp import config
from pype.ftrack.lib import get_avalon_attr
from pype.vendor.ftrack_api import session as fa_session
from ftrack_api import session as fa_session
class PrepareProject(BaseAction):

View file

@ -7,7 +7,7 @@ import json
from pypeapp import Logger, config
from pype.ftrack import BaseAction
from pype.vendor import ftrack_api
import ftrack_api
from avalon import io, api
log = Logger().get_logger(__name__)

View file

@ -0,0 +1,347 @@
import os
from operator import itemgetter
from pype.ftrack import BaseAction
class SeedDebugProject(BaseAction):
'''Edit meta data action.'''
#: Action identifier.
identifier = "seed.debug.project"
#: Action label.
label = "SeedDebugProject"
#: Action description.
description = "Description"
#: priority
priority = 100
#: roles that are allowed to register this action
role_list = ["Pypeclub"]
icon = "{}/ftrack/action_icons/SeedProject.svg".format(
os.environ.get("PYPE_STATICS_SERVER", "")
)
# Asset names which will be created in `Assets` entity
assets = [
"Addax", "Alpaca", "Ant", "Antelope", "Aye", "Badger", "Bear", "Bee",
"Beetle", "Bluebird", "Bongo", "Bontebok", "Butterflie", "Caiman",
"Capuchin", "Capybara", "Cat", "Caterpillar", "Coyote", "Crocodile",
"Cuckoo", "Deer", "Dragonfly", "Duck", "Eagle", "Egret", "Elephant",
"Falcon", "Fossa", "Fox", "Gazelle", "Gecko", "Gerbil",
"GiantArmadillo", "Gibbon", "Giraffe", "Goose", "Gorilla",
"Grasshoper", "Hare", "Hawk", "Hedgehog", "Heron", "Hog",
"Hummingbird", "Hyena", "Chameleon", "Cheetah", "Iguana", "Jackal",
"Jaguar", "Kingfisher", "Kinglet", "Kite", "Komodo", "Lemur",
"Leopard", "Lion", "Lizard", "Macaw", "Malachite", "Mandrill",
"Mantis", "Marmoset", "Meadowlark", "Meerkat", "Mockingbird",
"Mongoose", "Monkey", "Nyal", "Ocelot", "Okapi", "Oribi", "Oriole",
"Otter", "Owl", "Panda", "Parrot", "Pelican", "Pig", "Porcupine",
"Reedbuck", "Rhinocero", "Sandpiper", "Servil", "Skink", "Sloth",
"Snake", "Spider", "Squirrel", "Sunbird", "Swallow", "Swift", "Tiger",
"Sylph", "Tanager", "Vulture", "Warthog", "Waterbuck", "Woodpecker",
"Zebra"
]
# Tasks which will be created for Assets
asset_tasks = [
"Modeling", "Lookdev", "Rigging"
]
# Tasks which will be created for Shots
shot_tasks = [
"Animation", "Lighting", "Compositing", "FX"
]
# Define how much sequences will be created
default_seq_count = 5
# Define how much shots will be created for each sequence
default_shots_count = 10
existing_projects = None
new_project_item = "< New Project >"
current_project_item = "< Current Project >"
def discover(self, session, entities, event):
''' Validation '''
return True
def interface(self, session, entities, event):
if event["data"].get("values", {}):
return
title = "Select Project where you want to create seed data"
items = []
item_splitter = {"type": "label", "value": "---"}
description_label = {
"type": "label",
"value": (
"WARNING: Action does NOT check if entities already exist !!!"
)
}
items.append(description_label)
all_projects = session.query("select full_name from Project").all()
self.existing_projects = [proj["full_name"] for proj in all_projects]
projects_items = [
{"label": proj, "value": proj} for proj in self.existing_projects
]
data_items = []
data_items.append({
"label": self.new_project_item,
"value": self.new_project_item
})
data_items.append({
"label": self.current_project_item,
"value": self.current_project_item
})
data_items.extend(sorted(
projects_items,
key=itemgetter("label"),
reverse=False
))
projects_item = {
"label": "Choose Project",
"type": "enumerator",
"name": "project_name",
"data": data_items,
"value": self.current_project_item
}
items.append(projects_item)
items.append(item_splitter)
items.append({
"label": "Number of assets",
"type": "number",
"name": "asset_count",
"value": len(self.assets)
})
items.append({
"label": "Number of sequences",
"type": "number",
"name": "seq_count",
"value": self.default_seq_count
})
items.append({
"label": "Number of shots",
"type": "number",
"name": "shots_count",
"value": self.default_shots_count
})
items.append(item_splitter)
note_label = {
"type": "label",
"value": (
"<p><i>NOTE: Enter project name and choose schema if you "
"chose `\"< New Project >\"`(code is optional)</i><p>"
)
}
items.append(note_label)
items.append({
"label": "Project name",
"name": "new_project_name",
"type": "text",
"value": ""
})
project_schemas = [
sch["name"] for sch in self.session.query("ProjectSchema").all()
]
schemas_item = {
"label": "Choose Schema",
"type": "enumerator",
"name": "new_schema_name",
"data": [
{"label": sch, "value": sch} for sch in project_schemas
],
"value": project_schemas[0]
}
items.append(schemas_item)
items.append({
"label": "*Project code",
"name": "new_project_code",
"type": "text",
"value": "",
"empty_text": "Optional..."
})
return {
"items": items,
"title": title
}
def launch(self, session, in_entities, event):
if "values" not in event["data"]:
return
# THIS IS THE PROJECT PART
values = event["data"]["values"]
selected_project = values["project_name"]
if selected_project == self.new_project_item:
project_name = values["new_project_name"]
if project_name in self.existing_projects:
msg = "Project \"{}\" already exist".format(project_name)
self.log.error(msg)
return {"success": False, "message": msg}
project_code = values["new_project_code"]
project_schema_name = values["new_schema_name"]
if not project_code:
project_code = project_name
project_code = project_code.lower().replace(" ", "_").strip()
_project = session.query(
"Project where name is \"{}\"".format(project_code)
).first()
if _project:
msg = "Project with code \"{}\" already exist".format(
project_code
)
self.log.error(msg)
return {"success": False, "message": msg}
project_schema = session.query(
"ProjectSchema where name is \"{}\"".format(
project_schema_name
)
).one()
# Create the project with the chosen schema.
self.log.debug((
"*** Creating Project: name <{}>, code <{}>, schema <{}>"
).format(project_name, project_code, project_schema_name))
project = session.create("Project", {
"name": project_code,
"full_name": project_name,
"project_schema": project_schema
})
session.commit()
elif selected_project == self.current_project_item:
entity = in_entities[0]
if entity.entity_type.lower() == "project":
project = entity
else:
if "project" in entity:
project = entity["project"]
else:
project = entity["parent"]["project"]
project_schema = project["project_schema"]
self.log.debug((
"*** Using Project: name <{}>, code <{}>, schema <{}>"
).format(
project["full_name"], project["name"], project_schema["name"]
))
else:
project = session.query("Project where full_name is \"{}\"".format(
selected_project
)).one()
project_schema = project["project_schema"]
self.log.debug((
"*** Using Project: name <{}>, code <{}>, schema <{}>"
).format(
project["full_name"], project["name"], project_schema["name"]
))
# THIS IS THE MAGIC PART
task_types = {}
for _type in project_schema["_task_type_schema"]["types"]:
if _type["name"] not in task_types:
task_types[_type["name"]] = _type
self.task_types = task_types
asset_count = values.get("asset_count") or len(self.assets)
seq_count = values.get("seq_count") or self.default_seq_count
shots_count = values.get("shots_count") or self.default_shots_count
self.create_assets(project, asset_count)
self.create_shots(project, seq_count, shots_count)
return True
def create_assets(self, project, asset_count):
self.log.debug("*** Creating assets:")
main_entity = self.session.create("Folder", {
"name": "Assets",
"parent": project
})
self.log.debug("- Assets")
available_assets = len(self.assets)
repetitive_times = (
int(asset_count / available_assets) +
(asset_count % available_assets > 0)
)
created_assets = 0
for _asset_name in self.assets:
if created_assets >= asset_count:
break
for asset_num in range(1, repetitive_times + 1):
if created_assets >= asset_count:
break
asset_name = "%s_%02d" % (_asset_name, asset_num)
asset = self.session.create("AssetBuild", {
"name": asset_name,
"parent": main_entity
})
created_assets += 1
self.log.debug("- Assets/{}".format(asset_name))
for task_name in self.asset_tasks:
self.session.create("Task", {
"name": task_name,
"parent": asset,
"type": self.task_types[task_name]
})
self.log.debug("- Assets/{}/{}".format(
asset_name, task_name
))
self.log.debug("*** Commiting Assets")
self.session.commit()
def create_shots(self, project, seq_count, shots_count):
self.log.debug("*** Creating shots:")
main_entity = self.session.create("Folder", {
"name": "Shots",
"parent": project
})
self.log.debug("- Shots")
for seq_num in range(1, seq_count+1):
seq_name = "sq%03d" % seq_num
seq = self.session.create("Sequence", {
"name": seq_name,
"parent": main_entity
})
self.log.debug("- Shots/{}".format(seq_name))
for shot_num in range(1, shots_count+1):
shot_name = "%ssh%04d" % (seq_name, (shot_num*10))
shot = self.session.create("Shot", {
"name": shot_name,
"parent": seq
})
self.log.debug("- Shots/{}/{}".format(seq_name, shot_name))
for task_name in self.shot_tasks:
self.session.create("Task", {
"name": task_name,
"parent": shot,
"type": self.task_types[task_name]
})
self.log.debug("- Shots/{}/{}/{}".format(
seq_name, shot_name, task_name
))
self.log.debug("*** Commiting Shots")
self.session.commit()
def register(session, plugins_presets={}):
'''Register plugin. Called when used as an plugin.'''
SeedDebugProject(session, plugins_presets).register()

View file

@ -1,4 +1,4 @@
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction

View file

@ -13,8 +13,8 @@ from pymongo import UpdateOne
import avalon
from pype.ftrack import BaseAction
from pype.ftrack.lib.io_nonsingleton import DbConnector
from pype.vendor import ftrack_api
from pype.vendor.ftrack_api import session as fa_session
import ftrack_api
from ftrack_api import session as fa_session
from pypeapp import Anatomy
@ -585,15 +585,19 @@ class SyncEntitiesFactory:
])
cust_attr_query = (
"select value, entity_id from CustomAttributeValue "
"select value, entity_id from ContextCustomAttributeValue "
"where entity_id in ({}) and configuration.key in ({})"
)
[values] = self.session._call([{
call_expr = [{
"action": "query",
"expression": cust_attr_query.format(
entity_ids_joined, attributes_joined
)
}])
}]
if hasattr(self.session, "_call"):
[values] = self.session._call(call_expr)
else:
[values] = self.session.call(call_expr)
for value in values["data"]:
entity_id = value["entity_id"]
@ -646,13 +650,17 @@ class SyncEntitiesFactory:
attributes_joined = ", ".join([
"\"{}\"".format(name) for name in attribute_names
])
[values] = self.session._call([{
call_expr = [{
"action": "query",
"expression": (
"select value, entity_id from CustomAttributeValue "
"select value, entity_id from ContextCustomAttributeValue "
"where entity_id in ({}) and configuration.key in ({})"
).format(entity_ids_joined, attributes_joined)
}])
}]
if hasattr(self.session, "_call"):
[values] = self.session._call(call_expr)
else:
[values] = self.session.call(call_expr)
avalon_hier = []
for value in values["data"]:

View file

@ -6,7 +6,7 @@ import collections
import json
import re
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction
from avalon import io, inventory, schema

View file

@ -4,7 +4,7 @@ import argparse
import logging
import json
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction

View file

@ -3,7 +3,7 @@ import sys
import argparse
import logging
import json
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction

View file

@ -6,7 +6,7 @@ import collections
import json
import re
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction
from avalon import io, inventory, schema
from pype.ftrack.lib.io_nonsingleton import DbConnector

View file

@ -1,7 +1,7 @@
import os
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction
from pype.vendor.ftrack_api import session as fa_session
from ftrack_api import session as fa_session
class ActionAskWhereIRun(BaseAction):

View file

@ -1,7 +1,7 @@
import platform
import socket
import getpass
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseAction

View file

@ -13,8 +13,8 @@ from pymongo import UpdateOne
import avalon
from pype.ftrack import BaseAction
from pype.ftrack.lib.io_nonsingleton import DbConnector
from pype.vendor import ftrack_api
from pype.vendor.ftrack_api import session as fa_session
import ftrack_api
from ftrack_api import session as fa_session
from pypeapp import Anatomy, config
@ -585,7 +585,7 @@ class SyncEntitiesFactory:
])
cust_attr_query = (
"select value, entity_id from CustomAttributeValue "
"select value, entity_id from ContextCustomAttributeValue "
"where entity_id in ({}) and configuration.key in ({})"
)
[values] = self.session._call([{
@ -649,7 +649,7 @@ class SyncEntitiesFactory:
[values] = self.session._call([{
"action": "query",
"expression": (
"select value, entity_id from CustomAttributeValue "
"select value, entity_id from ContextCustomAttributeValue "
"where entity_id in ({}) and configuration.key in ({})"
).format(entity_ids_joined, attributes_joined)
}])

View file

@ -1,6 +1,6 @@
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseEvent, get_ca_mongoid
from pype.ftrack.events.event_sync_to_avalon import Sync_to_Avalon
from pype.ftrack.events.event_sync_to_avalon import SyncToAvalon
class DelAvalonIdFromNew(BaseEvent):
@ -11,7 +11,7 @@ class DelAvalonIdFromNew(BaseEvent):
Priority of this event must be less than SyncToAvalon event
'''
priority = Sync_to_Avalon.priority - 1
priority = SyncToAvalon.priority - 1
def launch(self, session, event):
created = []

View file

@ -1,4 +1,4 @@
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseEvent
import operator

View file

@ -1,5 +1,5 @@
from pype.vendor import ftrack_api
from pype.ftrack import BaseEvent
import ftrack_api
from pype.ftrack.lib import BaseEvent
class RadioButtons(BaseEvent):
@ -37,4 +37,4 @@ class RadioButtons(BaseEvent):
def register(session, plugins_presets):
'''Register plugin. Called when used as an plugin.'''
Radio_buttons(session, plugins_presets).register()
RadioButtons(session, plugins_presets).register()

View file

@ -3,7 +3,7 @@ import sys
from pype.ftrack.lib.io_nonsingleton import DbConnector
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseEvent, lib
from bson.objectid import ObjectId

View file

@ -1,3 +1,4 @@
import ftrack_api
from pype.ftrack import BaseEvent, lib

View file

@ -1,7 +1,7 @@
import os
import sys
import re
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseEvent

View file

@ -1,4 +1,4 @@
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseEvent
@ -49,7 +49,10 @@ class ThumbnailEvents(BaseEvent):
self.log.info(msg)
session.commit()
try:
session.commit()
except Exception:
session.rollback()
def register(session, plugins_presets):

View file

@ -1,4 +1,4 @@
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseEvent, lib
from pype.ftrack.lib.io_nonsingleton import DbConnector
from bson.objectid import ObjectId

View file

@ -1,4 +1,4 @@
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack import BaseEvent
@ -6,7 +6,6 @@ class VersionToTaskStatus(BaseEvent):
def launch(self, session, event):
'''Propagates status from version to task when changed'''
session.commit()
# start of event procedure ----------------------------------
for entity in event['data'].get('entities', []):
@ -65,6 +64,7 @@ class VersionToTaskStatus(BaseEvent):
session.rollback()
self.log.warning('!!! [ {} ] status couldnt be set:\
[ {} ]'.format(path, e))
session.rollback()
else:
self.log.info('>>> [ {} ] updated to [ {} ]'.format(
path, task_status['name']))

View file

@ -9,7 +9,7 @@ import atexit
import time
from urllib.parse import urlparse
from pype.vendor import ftrack_api
import ftrack_api
from pype.ftrack.lib import credentials
from pype.ftrack.ftrack_server import FtrackServer
from pype.ftrack.ftrack_server.lib import (

View file

@ -2,7 +2,7 @@ import os
import sys
import types
import importlib
from pype.vendor import ftrack_api
import ftrack_api
import time
import logging
import inspect

View file

@ -30,8 +30,7 @@ def main(args):
server.run_server(session)
except Exception as exc:
import traceback
traceback.print_tb(exc.__traceback__)
log.error("Event server crashed. See traceback below", exc_info=True)
finally:
log.debug("First closing socket")

View file

@ -6,8 +6,8 @@ import signal
import threading
from ftrack_server import FtrackServer
from pype.vendor import ftrack_api
from pype.vendor.ftrack_api.event.hub import EventHub
import ftrack_api
from ftrack_api.event.hub import EventHub
from pypeapp import Logger
log = Logger().get_logger("Event Server Legacy")

View file

@ -1,6 +1,6 @@
import os
import json
from pype.vendor import ftrack_api
import ftrack_api
import appdirs

View file

@ -345,25 +345,44 @@ class AppAction(BaseHandler):
statuses = presets['status_update']
actual_status = entity['status']['name'].lower()
next_status_name = None
for key, value in statuses.items():
if actual_status in value or '_any_' in value:
if key != '_ignore_':
next_status_name = key
already_tested = []
ent_path = "/".join(
[ent["name"] for ent in entity['link']]
)
while True:
next_status_name = None
for key, value in statuses.items():
if key in already_tested:
continue
if actual_status in value or '_any_' in value:
if key != '_ignore_':
next_status_name = key
already_tested.append(key)
break
already_tested.append(key)
if next_status_name is None:
break
if next_status_name is not None:
try:
query = 'Status where name is "{}"'.format(
next_status_name
)
status = session.query(query).one()
entity['status'] = status
session.commit()
self.log.debug("Changing status to \"{}\" <{}>".format(
next_status_name, ent_path
))
break
except Exception:
session.rollback()
msg = (
'Status "{}" in presets wasn\'t found on Ftrack'
).format(next_status_name)
'Status "{}" in presets wasn\'t found'
' on Ftrack entity type "{}"'
).format(next_status_name, entity.entity_type)
self.log.warning(msg)
# Set origin avalon environments

View file

@ -1,8 +1,8 @@
import functools
import time
from pypeapp import Logger
from pype.vendor import ftrack_api
from pype.vendor.ftrack_api import session as fa_session
import ftrack_api
from ftrack_api import session as fa_session
from pype.ftrack.ftrack_server import session_processor
@ -603,3 +603,24 @@ class BaseHandler(object):
self.log.debug(
"Action \"{}\" Triggered successfully".format(action_name)
)
def trigger_event(
self, topic, event_data={}, session=None, source=None,
event=None, on_error="ignore"
):
if session is None:
session = self.session
if not source and event:
source = event.get("source")
# Create and trigger event
event = fa_session.ftrack_api.event.base.Event(
topic=topic,
data=event_data,
source=source
)
session.event_hub.publish(event, on_error=on_error)
self.log.debug((
"Publishing event: {}"
).format(str(event.__dict__)))

View file

@ -4,7 +4,7 @@ import threading
import time
from Qt import QtCore, QtGui, QtWidgets
from pype.vendor import ftrack_api
import ftrack_api
from pypeapp import style
from pype.ftrack import FtrackServer, check_ftrack_url, credentials
from . import login_dialog

View file

@ -7,8 +7,6 @@ import contextlib
import subprocess
import inspect
from .vendor import pather
from .vendor.pather.error import ParseError
import avalon.io as io
import avalon.api
@ -572,3 +570,43 @@ def get_subsets(asset_name,
"representaions": repres_out}
return output_dict
class CustomNone:
"""Created object can be used as custom None (not equal to None).
WARNING: Multiple created objects are not equal either.
Exmple:
>>> a = CustomNone()
>>> a == None
False
>>> b = CustomNone()
>>> a == b
False
>>> a == a
True
"""
def __init__(self):
"""Create uuid as identifier for custom None."""
import uuid
self.identifier = str(uuid.uuid4())
def __bool__(self):
"""Return False (like default None)."""
return False
def __eq__(self, other):
"""Equality is compared by identifier value."""
if type(other) == type(self):
if other.identifier == self.identifier:
return True
return False
def __str__(self):
"""Return value of identifier when converted to string."""
return self.identifier
def __repr__(self):
"""Representation of custom None."""
return "<CustomNone-{}>".format(str(self.identifier))

View file

@ -144,8 +144,6 @@ class IntegrateFtrackApi(pyblish.api.InstancePlugin):
"version": 0,
"asset": asset_entity,
}
if task:
assetversion_data['task'] = task
assetversion_data.update(data.get("assetversion_data", {}))
@ -157,6 +155,9 @@ class IntegrateFtrackApi(pyblish.api.InstancePlugin):
# due to a ftrack_api bug where you can't add metadata on creation.
assetversion_metadata = assetversion_data.pop("metadata", {})
if task:
assetversion_data['task'] = task
# Create a new entity if none exits.
if not assetversion_entity:
assetversion_entity = session.create(

View file

@ -3,7 +3,7 @@ import json
import re
import pyblish.api
from pype.vendor import clique
import clique
class CollectJSON(pyblish.api.ContextPlugin):

View file

@ -4,7 +4,7 @@ import datetime
import time
import pyblish.api
from pype.vendor import clique
import clique
class ExtractJSON(pyblish.api.ContextPlugin):

View file

@ -1,7 +1,7 @@
import os
import pyblish.api
import subprocess
from pype.vendor import clique
import clique
class ExtractQuicktimeEXR(pyblish.api.InstancePlugin):

View file

@ -40,6 +40,15 @@ class CleanUp(pyblish.api.InstancePlugin):
active = True
def process(self, instance):
# Get the errored instances
failed = []
for result in instance.context.data["results"]:
if (result["error"] is not None and result["instance"] is not None
and result["instance"] not in failed):
failed.append(result["instance"])
assert instance not in failed, ("Result of '{}' instance "
"were not success".format(instance.data["name"]))
if [ef for ef in self.exclude_families
if instance.data["family"] in ef]:
return

View file

@ -1,7 +1,7 @@
import os
import pyblish.api
from pype.vendor import clique
import clique
import pype.api

View file

@ -1,7 +1,7 @@
import os
import pyblish.api
from pype.vendor import clique
import clique
import pype.api
from pypeapp import config

View file

@ -62,7 +62,6 @@ class IntegrateAssetNew(pyblish.api.InstancePlugin):
"render",
"imagesequence",
"review",
"render",
"rendersetup",
"rig",
"plate",

View file

@ -18,3 +18,6 @@ class CreateLook(avalon.maya.Creator):
# Whether to automatically convert the textures to .tx upon publish.
self.data["maketx"] = True
# Enable users to force a copy.
self.data["forceCopy"] = False

View file

@ -206,6 +206,11 @@ class ExtractLook(pype.api.Extractor):
destination = self.resource_destination(
instance, source, do_maketx
)
# Force copy is specified.
if instance.data.get("forceCopy", False):
mode = COPY
if mode == COPY:
transfers.append((source, destination))
elif mode == HARDLINK:

View file

@ -11,7 +11,7 @@ import avalon.maya
from maya import cmds, mel
import pymel.core as pm
from pype.vendor import ffmpeg
import ffmpeg
# from pype.scripts import otio_burnin
reload(ffmpeg)

View file

@ -11,7 +11,7 @@ import pype.api
from maya import cmds
import pymel.core as pm
from pype.vendor import ffmpeg
import ffmpeg
reload(ffmpeg)
import avalon.maya

View file

@ -59,16 +59,19 @@ class CollectNukeInstances(pyblish.api.ContextPlugin):
node.end()
family = avalon_knob_data["family"]
families = [avalon_knob_data["families"]]
if node["render"].value():
self.log.info("flagged for render")
family = "render.local"
add_family = "render.local"
# dealing with local/farm rendering
if node["render_farm"].value():
self.log.info("adding render farm family")
family = "render.farm"
instance.data['transfer'] = False
add_family = "render.farm"
instance.data["transfer"] = False
families.append(add_family)
else:
family = "render.no"
# add family into families
families.insert(0, family)
instance.data.update({
"subset": subset,
@ -76,8 +79,8 @@ class CollectNukeInstances(pyblish.api.ContextPlugin):
"label": node.name(),
"name": node.name(),
"subset": subset,
"family": avalon_knob_data["family"],
"families": [avalon_knob_data["families"], family],
"family": family,
"families": families,
"avalonKnob": avalon_knob_data,
"publish": node.knob('publish').value(),
"step": 1,

View file

@ -1,25 +1,30 @@
import nuke
import pyblish.api
class CollectWriteLegacy(pyblish.api.ContextPlugin):
class CollectWriteLegacy(pyblish.api.InstancePlugin):
"""Collect legacy write nodes."""
order = pyblish.api.CollectorOrder
label = "Collect Write Legacy"
order = pyblish.api.CollectorOrder + 0.0101
label = "Collect Write node Legacy"
hosts = ["nuke", "nukeassist"]
def process(self, context):
def process(self, instance):
self.log.info(instance[:])
node = instance[0]
for node in nuke.allNodes():
if node.Class() != "Write":
continue
if node.Class() not in ["Group", "Write"]:
return
if "avalon" not in node.knobs().keys():
continue
family_knobs = ["ak:family", "avalon:family"]
test = [k for k in node.knobs().keys() if k in family_knobs]
self.log.info(test)
instance = context.create_instance(
node.name(), family="write.legacy"
)
instance.append(node)
if len(test) == 1:
if "render" in node[test[0]].value():
self.log.info("render")
return
instance.data.update(
{"family": "write.legacy",
"families": []}
)

View file

@ -14,7 +14,7 @@ class IncrementScriptVersion(pyblish.api.ContextPlugin):
def process(self, context):
assert all(result["success"] for result in context.data["results"]), (
"Atomicity not held, aborting.")
"Publishing not succesfull so version is not increased.")
instances = context[:]

View file

@ -0,0 +1,101 @@
import nuke
import pyblish.api
import pype.api
class ValidateNukeWriteKnobs(pyblish.api.ContextPlugin):
"""Ensure knobs are consistent.
Knobs to validate and their values comes from the
"nuke/knobs.json" preset, which needs this structure:
{
"family": {
"knob_name": knob_value
}
}
"""
order = pyblish.api.ValidatorOrder
label = "Knobs"
hosts = ["nuke"]
actions = [pype.api.RepairContextAction]
optional = True
def process(self, context):
# Check for preset existence.
if not context.data["presets"]["nuke"].get("knobs"):
return
invalid = self.get_invalid(context, compute=True)
if invalid:
raise RuntimeError(
"Found knobs with invalid values: {}".format(invalid)
)
@classmethod
def get_invalid(cls, context, compute=False):
invalid = context.data.get("invalid_knobs", [])
if compute:
invalid = cls.get_invalid_knobs(context)
return invalid
@classmethod
def get_invalid_knobs(cls, context):
presets = context.data["presets"]["nuke"]["knobs"]
invalid_knobs = []
for instance in context:
# Filter publisable instances.
if not instance.data["publish"]:
continue
# Filter families.
families = [instance.data["family"]]
families += instance.data.get("families", [])
families = list(set(families) & set(presets.keys()))
if not families:
continue
# Get all knobs to validate.
knobs = {}
for family in families:
for preset in presets[family]:
knobs.update({preset: presets[family][preset]})
# Get invalid knobs.
nodes = []
for node in nuke.allNodes():
nodes.append(node)
if node.Class() == "Group":
node.begin()
for i in nuke.allNodes():
nodes.append(i)
node.end()
for node in nodes:
for knob in node.knobs():
if knob in knobs.keys():
expected = knobs[knob]
if node[knob].value() != expected:
invalid_knobs.append(
{
"knob": node[knob],
"expected": expected,
"current": node[knob].value()
}
)
context.data["invalid_knobs"] = invalid_knobs
return invalid_knobs
@classmethod
def repair(cls, instance):
invalid = cls.get_invalid(instance)
for data in invalid:
if isinstance(data["expected"], unicode):
data["knob"].setValue(str(data["expected"]))
continue
data["knob"].setValue(data["expected"])

View file

@ -4,8 +4,9 @@ import os
import nuke
from avalon import api
import re
import pyblish.api
from avalon.nuke import get_avalon_knob_data
class RepairWriteLegacyAction(pyblish.api.Action):
@ -26,24 +27,47 @@ class RepairWriteLegacyAction(pyblish.api.Action):
instances = pyblish.api.instances_by_plugin(failed, plugin)
for instance in instances:
data = toml.loads(instance[0]["avalon"].value())
if "Write" in instance[0].Class():
data = toml.loads(instance[0]["avalon"].value())
else:
data = get_avalon_knob_data(instance[0])
self.log.info(data)
data["xpos"] = instance[0].xpos()
data["ypos"] = instance[0].ypos()
data["input"] = instance[0].input(0)
data["publish"] = instance[0]["publish"].value()
data["render"] = instance[0]["render"].value()
data["render_farm"] = instance[0]["render_farm"].value()
data["review"] = instance[0]["review"].value()
nuke.delete(instance[0])
# nuke.delete(instance[0])
task = os.environ["AVALON_TASK"]
sanitized_task = re.sub('[^0-9a-zA-Z]+', '', task)
subset_name = "render{}Main".format(
sanitized_task.capitalize())
Create_name = "CreateWriteRender"
creator_plugin = None
for Creator in api.discover(api.Creator):
if Creator.__name__ != Create_name:
continue
creator_plugin = Creator
# return api.create()
creator_plugin(data["subset"], data["asset"]).process()
family = "render{}".format(os.environ["AVALON_TASK"].capitalize())
api.create(data["subset"], data["asset"], family)
node = nuke.toNode(data["subset"])
node.setXYpos(data["xpos"], data["ypos"])
node.setInput(0, data["input"])
node["publish"].setValue(data["publish"])
node["render"].setValue(data["render"])
node["render_farm"].setValue(data["render_farm"])
node["review"].setValue(data["review"])
class ValidateWriteLegacy(pyblish.api.InstancePlugin):

View file

@ -2,7 +2,7 @@ import os
import tempfile
import pyblish.api
from pype.vendor import clique
import clique
import pype.api

View file

@ -1,7 +1,7 @@
import time
import collections
from Qt import QtCore, QtGui, QtWidgets
from pype.vendor.pynput import mouse, keyboard
from pynput import mouse, keyboard
from pypeapp import Logger

View file

@ -1,4 +1,3 @@
from functools import wraps
from http import HTTPStatus
from .lib import (
@ -15,9 +14,12 @@ def route(path, url_prefix="", methods=[], strict_match=False):
:type path: str
:param url_prefix: Specify prefix of path, defaults to "/".
:type url_prefix: str, list, optional
:param methods: Specify request method (GET, POST, PUT, etc.) when callback will be triggered, defaults to ["GET"]
:param methods: Specify request method (GET, POST, PUT, etc.) when
callback will be triggered, defaults to ["GET"]
:type methods: list, str, optional
:param strict_match: Decides if callback can handle both single and multiple entities (~/projects/<project_name> && ~/projects/), defaults to False.
:param strict_match: Decides if callback can handle both single and
multiple entities (~/projects/<project_name> && ~/projects/),
defaults to False.
:type strict_match: bool
`path` may include dynamic keys that will be stored to object which can
@ -36,6 +38,7 @@ def route(path, url_prefix="", methods=[], strict_match=False):
In this case request path must be "/avalon/projects" to trigger registered
callback.
"""
def decorator(callback):
RestApiFactory.register_route(
path, callback, url_prefix, methods, strict_match
@ -49,26 +52,32 @@ def register_statics(url_prefix, dir_path):
"""Decorator that register callback and all its attributes.
Callback is registered to Singleton RestApiFactory.
:param url_prefix: Specify prefix of path, defaults to "/". (Example: "/resources")
:param url_prefix: Specify prefix of path, defaults to "/".
(Example: "/resources")
:type url_prefix: str
:param dir_path: Path to file folder where statics are located.
:type dir_path: str
"""
RestApiFactory.register_statics((url_prefix, dir_path))
def abort(status_code=HTTPStatus.NOT_FOUND, message=None):
"""Should be used to stop registered callback
"""Should be used to stop registered callback.
`abort` raise AbortException that is handled with request Handler which
returns entered status and may send optional message in body.
:param status_code: Status that will be send in reply of request, defaults to 404
:param status_code: Status that will be send in reply of request,
defaults to 404
:type status_code: int
:param message: Message to send in body, default messages are based on statuc_code in Handler, defaults to None
:param message: Message to send in body, default messages are based on
statuc_code in Handler, defaults to None
:type message: str, optional
...
:raises AbortException: This exception is handled in Handler to know about launched `abort`
:raises AbortException: This exception is handled in Handler to know
about launched `abort`
"""
items = []
items.append(str(status_code))
if not message:
@ -85,9 +94,10 @@ class RestApi:
Use this class is required when it is necessary to have class for handling
requests and want to use decorators for registering callbacks.
It is possible to use decorators in another class only when object, of class
where decorators are, is registered to RestApiFactory.
It is possible to use decorators in another class only when object,
of class where decorators are, is registered to RestApiFactory.
"""
def route(path, url_prefix="", methods=[], strict_match=False):
return route(path, url_prefix, methods, strict_match)

View file

@ -1,7 +1,5 @@
Splitter = "__splitter__"
from .exceptions import ObjAlreadyExist, AbortException
from .lib import RestMethods, CustomNone, CallbackResult, RequestInfo
from .lib import RestMethods, CallbackResult, RequestInfo, Splitter
from .factory import _RestApiFactory
RestApiFactory = _RestApiFactory()

View file

@ -8,4 +8,5 @@ class ObjAlreadyExist(Exception):
super().__init__(message)
class AbortException(Exception): pass
class AbortException(Exception):
pass

View file

@ -116,7 +116,8 @@ def prepare_methods(methods, callback=None):
:param methods: Contain rest api methods, when callback is called.
:type methods: str, list
:param callback: Registered callback, helps to identify where is invalid method.
:param callback: Registered callback, helps to identify where is
invalid method.
:type callback: function, method, optional
:return: Valid methods
:rtype: list
@ -138,7 +139,7 @@ def prepare_methods(methods, callback=None):
for method in methods:
found = False
_method = RestMethods.get(method)
if _method == None:
if _method is None:
invalid_methods[methods].append(callback)
continue
@ -169,6 +170,7 @@ def prepare_methods(methods, callback=None):
return _methods
def prepare_callback_info(callback):
"""Prepare data for callback handling when should be triggered."""
callback_info = inspect.getfullargspec(callback)
@ -228,7 +230,9 @@ class _RestApiFactory:
self.unprocessed_statics.index(item)
)
def register_route(self, path, callback, url_prefix, methods, strict_match):
def register_route(
self, path, callback, url_prefix, methods, strict_match
):
log.debug("Registering callback for item \"{}\"".format(
callback.__qualname__
))
@ -242,7 +246,7 @@ class _RestApiFactory:
self.unprocessed_routes.append(route)
def register_obj(self, obj):
"""Register object for decorated methods in class definition"""
"""Register object for decorated methods in class definition."""
self.registered_objs.append(obj)
def register_statics(self, item):
@ -255,7 +259,8 @@ class _RestApiFactory:
Registration info are prepared to easy filter during handling
of requests.
:param route: Contain all necessary info for filtering and handling callback for registered route.
:param route: Contain all necessary info for filtering and
handling callback for registered route.
:type route: dict
"""
callback = route["callback"]
@ -278,7 +283,7 @@ class _RestApiFactory:
})
def prepare_registered(self):
"""Iterate through all registered callbacks and statics and prepare them
"""Iter through all registered callbacks and statics to prepare them.
First are processed callbacks registered with decorators in classes by
registered objects. Remaining callbacks are filtered, it is checked if
@ -314,7 +319,9 @@ class _RestApiFactory:
if not (
callback.__qualname__ == method.__qualname__ and
callback.__module__ == method.__module__ and
callback.__globals__["__file__"] == method.__globals__["__file__"]
callback.__globals__["__file__"] == (
method.__globals__["__file__"]
)
):
continue

View file

@ -1,6 +1,7 @@
import os
import re
import json
import datetime
import traceback
import http.server
from http import HTTPStatus
@ -32,6 +33,7 @@ class Handler(http.server.SimpleHTTPRequestHandler):
"NO_CONTENT": 204
}
}
def do_GET(self):
return self._handle_request(RestMethods.GET)
@ -143,7 +145,6 @@ class Handler(http.server.SimpleHTTPRequestHandler):
except Exception as exc:
log_message = "Unexpected Exception was raised (this is bug!)"
log.error(log_message, exc_info=True)
replace_helper = 0
items = [log_message]
items += traceback.extract_tb(exc.__traceback__).format()
message = "\n".join(items)
@ -156,7 +157,6 @@ class Handler(http.server.SimpleHTTPRequestHandler):
self.wfile.write(message.encode())
return message
def _handle_callback_result(self, result, rest_method):
"""Send response to request based on result of callback.
:param result: Result returned by callback.
@ -232,7 +232,7 @@ class Handler(http.server.SimpleHTTPRequestHandler):
:param item: Item stored during callback registration with all info.
:type item: dict
:param parsed_url: Url parsed with urllib (separated path, query, etc.).
:param parsed_url: Url parsed with urllib (separated path, query, etc).
:type parsed_url: ParseResult
:param rest_method: Rest api method (GET, POST, etc.).
:type rest_method: RestMethods
@ -284,7 +284,7 @@ class Handler(http.server.SimpleHTTPRequestHandler):
return callback(*args, **kwargs)
def _handle_statics(self, dirpath, path):
"""Return static file in response when file exist in registered destination."""
"""Return static file in response when file exist in destination."""
path = os.path.normpath(dirpath + path)
ctype = self.guess_type(path)
@ -327,12 +327,17 @@ class Handler(http.server.SimpleHTTPRequestHandler):
self.send_response(HTTPStatus.OK)
self.send_header("Content-type", ctype)
self.send_header("Content-Length", str(file_stat[6]))
self.send_header("Last-Modified",
self.date_time_string(file_stat.st_mtime))
self.send_header(
"Last-Modified",
self.date_time_string(file_stat.st_mtime)
)
self.end_headers()
self.wfile.write(file_obj.read())
return file_obj
except:
self.log.error("Failed to read data from file \"{}\"".format(path))
except Exception:
log.error(
"Failed to read data from file \"{}\"".format(path),
exc_info=True
)
finally:
file_obj.close()

View file

@ -1,12 +1,8 @@
import os
import re
import enum
from http import HTTPStatus
from urllib.parse import urlencode, parse_qs
from pypeapp import Logger
log = Logger().get_logger("RestApiServer")
Splitter = "__splitter__"
class RestMethods(enum.Enum):
@ -35,27 +31,6 @@ class RestMethods(enum.Enum):
return default
class CustomNone:
"""Created object can be used as custom None (not equal to None)"""
def __init__(self, name):
self._name = name
def __bool__(self):
return False
def __eq__(self, other):
if type(other) == type(self):
if other._name == self._name:
return True
return False
def __str__(self):
return self._name
def __repr__(self):
return self._name
class HandlerDict(dict):
def __init__(self, data=None, *args, **kwargs):
if not data:
@ -65,12 +40,22 @@ class HandlerDict(dict):
def __repr__(self):
return "<{}> {}".format(self.__class__.__name__, str(dict(self)))
class Params(HandlerDict): pass
class UrlData(HandlerDict): pass
class RequestData(HandlerDict): pass
class Params(HandlerDict):
pass
class UrlData(HandlerDict):
pass
class RequestData(HandlerDict):
pass
class Query(HandlerDict):
"""Class for url query convert to dict and string"""
"""Class for url query convert to dict and string."""
def __init__(self, query):
if isinstance(query, dict):
pass
@ -81,8 +66,10 @@ class Query(HandlerDict):
def get_string(self):
return urlencode(dict(self), doseq=True)
class Fragment(HandlerDict):
"""Class for url fragment convert to dict and string"""
"""Class for url fragment convert to dict and string."""
def __init__(self, fragment):
if isinstance(fragment, dict):
_fragment = fragment
@ -112,13 +99,15 @@ class Fragment(HandlerDict):
)
return "&".join(items)
class RequestInfo:
"""Object that can be passed to callback as argument.
Contain necessary data for handling request.
Object is created to single use and can be used similar to dict.
:param url_data: Data collected from path when path with dynamic keys is matching.
:param url_data: Data collected from path when path with dynamic keys
is matching.
:type url_data: dict, None
:param request_data: Data of body from request.
:type request_data: dict, None
@ -166,14 +155,15 @@ class CallbackResult:
"""Can be used as return value of callback.
It is possible to specify status code, success boolean, message and data
for specify head and body of request response. `abort` should be rather used
when result is error.
for specify head and body of request response. `abort` should be rather
used when result is error.
:param status_code: Status code of result.
:type status_code: int
:param success: Success is key in body, may be used for handling response.
:type success: bool
:param message: Similar to success, message is key in body and may be used for handling response.
:param message: Similar to success, message is key in body and may
be used for handling response.
:type message: str, None
:param data: Data is also key for body in response.
:type data: dict, None

View file

@ -1,7 +1,4 @@
import os
import re
import collections
import threading
import socket
import socketserver
from Qt import QtCore

View file

@ -6,7 +6,7 @@ import json
from . import QtWidgets, QtCore, QtGui
from . import HelpRole, FamilyRole, ExistsRole, PluginRole
from . import qtawesome
from pype.vendor import six
import six
from pype import lib as pypelib

608
pype/vendor/appdirs.py vendored
View file

@ -1,608 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2005-2010 ActiveState Software Inc.
# Copyright (c) 2013 Eddy Petrișor
"""Utilities for determining application-specific dirs.
See <http://github.com/ActiveState/appdirs> for details and usage.
"""
# Dev Notes:
# - MSDN on where to store app data files:
# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
__version_info__ = (1, 4, 3)
__version__ = '.'.join(map(str, __version_info__))
import sys
import os
PY3 = sys.version_info[0] == 3
if PY3:
unicode = str
if sys.platform.startswith('java'):
import platform
os_name = platform.java_ver()[3][0]
if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
system = 'win32'
elif os_name.startswith('Mac'): # "Mac OS X", etc.
system = 'darwin'
else: # "Linux", "SunOS", "FreeBSD", etc.
# Setting this to "linux2" is not ideal, but only Windows or Mac
# are actually checked for and the rest of the module expects
# *sys.platform* style strings.
system = 'linux2'
else:
system = sys.platform
def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
r"""Return full path to the user-specific data dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"roaming" (boolean, default False) can be set True to use the Windows
roaming appdata directory. That means that for users on a Windows
network setup for roaming profiles, this user data will be
sync'd on login. See
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
for a discussion of issues.
Typical user data directories are:
Mac OS X: ~/Library/Application Support/<AppName>
Unix: ~/.local/share/<AppName> # or in $XDG_DATA_HOME, if defined
Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
That means, by default "~/.local/share/<AppName>".
"""
if system == "win32":
if appauthor is None:
appauthor = appname
const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
path = os.path.normpath(_get_win_folder(const))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
elif system == 'darwin':
path = os.path.expanduser('~/Library/Application Support/')
if appname:
path = os.path.join(path, appname)
else:
path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share"))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
r"""Return full path to the user-shared data dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"multipath" is an optional parameter only applicable to *nix
which indicates that the entire list of data dirs should be
returned. By default, the first item from XDG_DATA_DIRS is
returned, or '/usr/local/share/<AppName>',
if XDG_DATA_DIRS is not set
Typical site data directories are:
Mac OS X: /Library/Application Support/<AppName>
Unix: /usr/local/share/<AppName> or /usr/share/<AppName>
Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7.
For Unix, this is using the $XDG_DATA_DIRS[0] default.
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
"""
if system == "win32":
if appauthor is None:
appauthor = appname
path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
elif system == 'darwin':
path = os.path.expanduser('/Library/Application Support')
if appname:
path = os.path.join(path, appname)
else:
# XDG default for $XDG_DATA_DIRS
# only first, if multipath is False
path = os.getenv('XDG_DATA_DIRS',
os.pathsep.join(['/usr/local/share', '/usr/share']))
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
if appname:
if version:
appname = os.path.join(appname, version)
pathlist = [os.sep.join([x, appname]) for x in pathlist]
if multipath:
path = os.pathsep.join(pathlist)
else:
path = pathlist[0]
return path
if appname and version:
path = os.path.join(path, version)
return path
def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
r"""Return full path to the user-specific config dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"roaming" (boolean, default False) can be set True to use the Windows
roaming appdata directory. That means that for users on a Windows
network setup for roaming profiles, this user data will be
sync'd on login. See
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
for a discussion of issues.
Typical user config directories are:
Mac OS X: same as user_data_dir
Unix: ~/.config/<AppName> # or in $XDG_CONFIG_HOME, if defined
Win *: same as user_data_dir
For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
That means, by default "~/.config/<AppName>".
"""
if system in ["win32", "darwin"]:
path = user_data_dir(appname, appauthor, None, roaming)
else:
path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config"))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
r"""Return full path to the user-shared data dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"multipath" is an optional parameter only applicable to *nix
which indicates that the entire list of config dirs should be
returned. By default, the first item from XDG_CONFIG_DIRS is
returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
Typical site config directories are:
Mac OS X: same as site_data_dir
Unix: /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
$XDG_CONFIG_DIRS
Win *: same as site_data_dir
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
"""
if system in ["win32", "darwin"]:
path = site_data_dir(appname, appauthor)
if appname and version:
path = os.path.join(path, version)
else:
# XDG default for $XDG_CONFIG_DIRS
# only first, if multipath is False
path = os.getenv('XDG_CONFIG_DIRS', '/etc/xdg')
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
if appname:
if version:
appname = os.path.join(appname, version)
pathlist = [os.sep.join([x, appname]) for x in pathlist]
if multipath:
path = os.pathsep.join(pathlist)
else:
path = pathlist[0]
return path
def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
r"""Return full path to the user-specific cache dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"opinion" (boolean) can be False to disable the appending of
"Cache" to the base app data dir for Windows. See
discussion below.
Typical user cache directories are:
Mac OS X: ~/Library/Caches/<AppName>
Unix: ~/.cache/<AppName> (XDG default)
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
On Windows the only suggestion in the MSDN docs is that local settings go in
the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
app data dir (the default returned by `user_data_dir` above). Apps typically
put cache data somewhere *under* the given dir here. Some examples:
...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
...\Acme\SuperApp\Cache\1.0
OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
This can be disabled with the `opinion=False` option.
"""
if system == "win32":
if appauthor is None:
appauthor = appname
path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
if appname:
if appauthor is not False:
path = os.path.join(path, appauthor, appname)
else:
path = os.path.join(path, appname)
if opinion:
path = os.path.join(path, "Cache")
elif system == 'darwin':
path = os.path.expanduser('~/Library/Caches')
if appname:
path = os.path.join(path, appname)
else:
path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def user_state_dir(appname=None, appauthor=None, version=None, roaming=False):
r"""Return full path to the user-specific state dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"roaming" (boolean, default False) can be set True to use the Windows
roaming appdata directory. That means that for users on a Windows
network setup for roaming profiles, this user data will be
sync'd on login. See
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
for a discussion of issues.
Typical user state directories are:
Mac OS X: same as user_data_dir
Unix: ~/.local/state/<AppName> # or in $XDG_STATE_HOME, if defined
Win *: same as user_data_dir
For Unix, we follow this Debian proposal <https://wiki.debian.org/XDGBaseDirectorySpecification#state>
to extend the XDG spec and support $XDG_STATE_HOME.
That means, by default "~/.local/state/<AppName>".
"""
if system in ["win32", "darwin"]:
path = user_data_dir(appname, appauthor, None, roaming)
else:
path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state"))
if appname:
path = os.path.join(path, appname)
if appname and version:
path = os.path.join(path, version)
return path
def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
r"""Return full path to the user-specific log dir for this application.
"appname" is the name of application.
If None, just the system directory is returned.
"appauthor" (only used on Windows) is the name of the
appauthor or distributing body for this application. Typically
it is the owning company name. This falls back to appname. You may
pass False to disable it.
"version" is an optional version path element to append to the
path. You might want to use this if you want multiple versions
of your app to be able to run independently. If used, this
would typically be "<major>.<minor>".
Only applied when appname is present.
"opinion" (boolean) can be False to disable the appending of
"Logs" to the base app data dir for Windows, and "log" to the
base cache dir for Unix. See discussion below.
Typical user log directories are:
Mac OS X: ~/Library/Logs/<AppName>
Unix: ~/.cache/<AppName>/log # or under $XDG_CACHE_HOME if defined
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
On Windows the only suggestion in the MSDN docs is that local settings
go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
examples of what some windows apps use for a logs dir.)
OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
value for Windows and appends "log" to the user cache dir for Unix.
This can be disabled with the `opinion=False` option.
"""
if system == "darwin":
path = os.path.join(
os.path.expanduser('~/Library/Logs'),
appname)
elif system == "win32":
path = user_data_dir(appname, appauthor, version)
version = False
if opinion:
path = os.path.join(path, "Logs")
else:
path = user_cache_dir(appname, appauthor, version)
version = False
if opinion:
path = os.path.join(path, "log")
if appname and version:
path = os.path.join(path, version)
return path
class AppDirs(object):
"""Convenience wrapper for getting application dirs."""
def __init__(self, appname=None, appauthor=None, version=None,
roaming=False, multipath=False):
self.appname = appname
self.appauthor = appauthor
self.version = version
self.roaming = roaming
self.multipath = multipath
@property
def user_data_dir(self):
return user_data_dir(self.appname, self.appauthor,
version=self.version, roaming=self.roaming)
@property
def site_data_dir(self):
return site_data_dir(self.appname, self.appauthor,
version=self.version, multipath=self.multipath)
@property
def user_config_dir(self):
return user_config_dir(self.appname, self.appauthor,
version=self.version, roaming=self.roaming)
@property
def site_config_dir(self):
return site_config_dir(self.appname, self.appauthor,
version=self.version, multipath=self.multipath)
@property
def user_cache_dir(self):
return user_cache_dir(self.appname, self.appauthor,
version=self.version)
@property
def user_state_dir(self):
return user_state_dir(self.appname, self.appauthor,
version=self.version)
@property
def user_log_dir(self):
return user_log_dir(self.appname, self.appauthor,
version=self.version)
#---- internal support stuff
def _get_win_folder_from_registry(csidl_name):
"""This is a fallback technique at best. I'm not sure if using the
registry for this guarantees us the correct answer for all CSIDL_*
names.
"""
if PY3:
import winreg as _winreg
else:
import _winreg
shell_folder_name = {
"CSIDL_APPDATA": "AppData",
"CSIDL_COMMON_APPDATA": "Common AppData",
"CSIDL_LOCAL_APPDATA": "Local AppData",
}[csidl_name]
key = _winreg.OpenKey(
_winreg.HKEY_CURRENT_USER,
r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
)
dir, type = _winreg.QueryValueEx(key, shell_folder_name)
return dir
def _get_win_folder_with_pywin32(csidl_name):
from win32com.shell import shellcon, shell
dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
# Try to make this a unicode path because SHGetFolderPath does
# not return unicode strings when there is unicode data in the
# path.
try:
dir = unicode(dir)
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in dir:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
try:
import win32api
dir = win32api.GetShortPathName(dir)
except ImportError:
pass
except UnicodeError:
pass
return dir
def _get_win_folder_with_ctypes(csidl_name):
import ctypes
csidl_const = {
"CSIDL_APPDATA": 26,
"CSIDL_COMMON_APPDATA": 35,
"CSIDL_LOCAL_APPDATA": 28,
}[csidl_name]
buf = ctypes.create_unicode_buffer(1024)
ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in buf:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
buf2 = ctypes.create_unicode_buffer(1024)
if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
buf = buf2
return buf.value
def _get_win_folder_with_jna(csidl_name):
import array
from com.sun import jna
from com.sun.jna.platform import win32
buf_size = win32.WinDef.MAX_PATH * 2
buf = array.zeros('c', buf_size)
shell = win32.Shell32.INSTANCE
shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
# Downgrade to short path name if have highbit chars. See
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
has_high_char = False
for c in dir:
if ord(c) > 255:
has_high_char = True
break
if has_high_char:
buf = array.zeros('c', buf_size)
kernel = win32.Kernel32.INSTANCE
if kernel.GetShortPathName(dir, buf, buf_size):
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
return dir
if system == "win32":
try:
import win32com.shell
_get_win_folder = _get_win_folder_with_pywin32
except ImportError:
try:
from ctypes import windll
_get_win_folder = _get_win_folder_with_ctypes
except ImportError:
try:
import com.sun.jna
_get_win_folder = _get_win_folder_with_jna
except ImportError:
_get_win_folder = _get_win_folder_from_registry
#---- self test code
if __name__ == "__main__":
appname = "MyApp"
appauthor = "MyCompany"
props = ("user_data_dir",
"user_config_dir",
"user_cache_dir",
"user_state_dir",
"user_log_dir",
"site_data_dir",
"site_config_dir")
print("-- app dirs %s --" % __version__)
print("-- app dirs (with optional 'version')")
dirs = AppDirs(appname, appauthor, version="1.0")
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
print("\n-- app dirs (without optional 'version')")
dirs = AppDirs(appname, appauthor)
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
print("\n-- app dirs (without optional 'appauthor')")
dirs = AppDirs(appname)
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))
print("\n-- app dirs (with disabled 'appauthor')")
dirs = AppDirs(appname, appauthor=False)
for prop in props:
print("%s: %s" % (prop, getattr(dirs, prop)))

View file

@ -1 +0,0 @@
pip

View file

@ -1,13 +0,0 @@
Copyright 2013 Chris Smith
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View file

@ -1,125 +0,0 @@
Metadata-Version: 2.1
Name: arrow
Version: 0.12.1
Summary: Better dates and times for Python
Home-page: https://github.com/crsmithdev/arrow/
Author: Chris Smith
Author-email: crsmithdev@gmail.com
License: Apache 2.0
Platform: UNKNOWN
Classifier: Development Status :: 4 - Beta
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Programming Language :: Python :: 3.5
Classifier: Topic :: Software Development :: Libraries :: Python Modules
Requires-Dist: python-dateutil
Requires-Dist: backports.functools-lru-cache (>=1.2.1); python_version=='2.7'
Arrow - Better dates & times for Python
=======================================
.. image:: https://travis-ci.org/crsmithdev/arrow.svg
:alt: build status
:target: https://travis-ci.org/crsmithdev/arrow
.. image:: https://codecov.io/github/crsmithdev/arrow/coverage.svg?branch=master
:target: https://codecov.io/github/crsmithdev/arrow
:alt: Codecov
.. image:: https://img.shields.io/pypi/v/arrow.svg
:target: https://pypi.python.org/pypi/arrow
:alt: downloads
Documentation: `arrow.readthedocs.org <http://arrow.readthedocs.org/en/latest/>`_
---------------------------------------------------------------------------------
What?
-----
Arrow is a Python library that offers a sensible, human-friendly approach to creating, manipulating, formatting and converting dates, times, and timestamps. It implements and updates the datetime type, plugging gaps in functionality, and provides an intelligent module API that supports many common creation scenarios. Simply put, it helps you work with dates and times with fewer imports and a lot less code.
Arrow is heavily inspired by `moment.js <https://github.com/timrwood/moment>`_ and `requests <https://github.com/kennethreitz/requests>`_
Why?
----
Python's standard library and some other low-level modules have near-complete date, time and time zone functionality but don't work very well from a usability perspective:
- Too many modules: datetime, time, calendar, dateutil, pytz and more
- Too many types: date, time, datetime, tzinfo, timedelta, relativedelta, etc.
- Time zones and timestamp conversions are verbose and unpleasant
- Time zone naievety is the norm
- Gaps in functionality: ISO-8601 parsing, timespans, humanization
Features
--------
- Fully implemented, drop-in replacement for datetime
- Supports Python 2.6, 2.7, 3.3, 3.4 and 3.5
- Time zone-aware & UTC by default
- Provides super-simple creation options for many common input scenarios
- Updated .replace method with support for relative offsets, including weeks
- Formats and parses strings automatically
- Partial support for ISO-8601
- Timezone conversion
- Timestamp available as a property
- Generates time spans, ranges, floors and ceilings in timeframes from year to microsecond
- Humanizes and supports a growing list of contributed locales
- Extensible for your own Arrow-derived types
Quick start
-----------
First:
.. code-block:: console
$ pip install arrow
And then:
.. code-block:: pycon
>>> import arrow
>>> utc = arrow.utcnow()
>>> utc
<Arrow [2013-05-11T21:23:58.970460+00:00]>
>>> utc = utc.replace(hours=-1)
>>> utc
<Arrow [2013-05-11T20:23:58.970460+00:00]>
>>> local = utc.to('US/Pacific')
>>> local
<Arrow [2013-05-11T13:23:58.970460-07:00]>
>>> arrow.get('2013-05-11T21:23:58.970460+00:00')
<Arrow [2013-05-11T21:23:58.970460+00:00]>
>>> local.timestamp
1368303838
>>> local.format()
'2013-05-11 13:23:58 -07:00'
>>> local.format('YYYY-MM-DD HH:mm:ss ZZ')
'2013-05-11 13:23:58 -07:00'
>>> local.humanize()
'an hour ago'
>>> local.humanize(locale='ko_kr')
'1시간 전'
Further documentation can be found at `arrow.readthedocs.org <http://arrow.readthedocs.org/en/latest/>`_
Contributing
------------
Contributions are welcome, especially with localization. See `locales.py <https://github.com/crsmithdev/arrow/blob/master/arrow/locales.py>`_ for what's currently supported.

View file

@ -1,22 +0,0 @@
arrow-0.12.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
arrow-0.12.1.dist-info/LICENSE,sha256=pLdgG-UFacLJapgY_ICbAUlBDITJlxTWDJ1PsK6GH6I,579
arrow-0.12.1.dist-info/METADATA,sha256=PVodUfwAHQ-YjlzNkcrgnJIWud0vTQpCa_Hd-3zBOG0,4284
arrow-0.12.1.dist-info/RECORD,,
arrow-0.12.1.dist-info/WHEEL,sha256=1Cc6AMC7xwtmRYeNiHurrGRLJoEuor81IEEqyHblQuw,116
arrow-0.12.1.dist-info/top_level.txt,sha256=aCBThK2RIB824ctI3l9i6z94l8UYpFF-BC4m3dDzFFo,6
arrow/__init__.py,sha256=_q2QJezQr_fCP0jE4ldqzWJwS_yw9unClcjUOdmMOn0,164
arrow/__pycache__/__init__.cpython-36.pyc,,
arrow/__pycache__/api.cpython-36.pyc,,
arrow/__pycache__/arrow.cpython-36.pyc,,
arrow/__pycache__/factory.cpython-36.pyc,,
arrow/__pycache__/formatter.cpython-36.pyc,,
arrow/__pycache__/locales.cpython-36.pyc,,
arrow/__pycache__/parser.cpython-36.pyc,,
arrow/__pycache__/util.cpython-36.pyc,,
arrow/api.py,sha256=C6fxwcwAdd_KKhh7gaQcK_vSP4CRNNnuCUGGSn_8DS4,1066
arrow/arrow.py,sha256=CE7oSKXj9aEYnw7qw1hKA3X92ztiYnAin-QtfuSkROU,36025
arrow/factory.py,sha256=C7Ef9NQ5-hkrTECMM_NI-UIpBuMF5m546IkfZXIzfP4,8569
arrow/formatter.py,sha256=R1b-l3ejp8j8R71tF_fb19BFiyEY4VWrsGyX13g-o1Y,3434
arrow/locales.py,sha256=3iq7JEv8swN7rHrbsyHm2Jp0YMV6Ed88fa6JIiOOgVk,72239
arrow/parser.py,sha256=m_jSHIVZ0MH_pbhwjny6M2_4QpquY9V0gfDK0c6qYMM,11141
arrow/util.py,sha256=jO7c3wY9eN_58hy0KDN9caGi7F5Aip1M3Vc3PQlUixc,1034

View file

@ -1,6 +0,0 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.32.0)
Root-Is-Purelib: true
Tag: py2-none-any
Tag: py3-none-any

View file

@ -1 +0,0 @@
arrow

View file

@ -1,8 +0,0 @@
# -*- coding: utf-8 -*-
from .arrow import Arrow
from .factory import ArrowFactory
from .api import get, now, utcnow
__version__ = '0.12.1'
VERSION = __version__

View file

@ -1,52 +0,0 @@
# -*- coding: utf-8 -*-
'''
Provides the default implementation of :class:`ArrowFactory <arrow.factory.ArrowFactory>`
methods for use as a module API.
'''
from __future__ import absolute_import
from arrow.factory import ArrowFactory
# internal default factory.
_factory = ArrowFactory()
def get(*args, **kwargs):
''' Calls the default :class:`ArrowFactory <arrow.factory.ArrowFactory>` ``get`` method.
'''
return _factory.get(*args, **kwargs)
def utcnow():
''' Calls the default :class:`ArrowFactory <arrow.factory.ArrowFactory>` ``utcnow`` method.
'''
return _factory.utcnow()
def now(tz=None):
''' Calls the default :class:`ArrowFactory <arrow.factory.ArrowFactory>` ``now`` method.
'''
return _factory.now(tz)
def factory(type):
''' Returns an :class:`.ArrowFactory` for the specified :class:`Arrow <arrow.arrow.Arrow>`
or derived type.
:param type: the type, :class:`Arrow <arrow.arrow.Arrow>` or derived.
'''
return ArrowFactory(type)
__all__ = ['get', 'utcnow', 'now', 'factory']

File diff suppressed because it is too large Load diff

View file

@ -1,258 +0,0 @@
# -*- coding: utf-8 -*-
"""
Implements the :class:`ArrowFactory <arrow.factory.ArrowFactory>` class,
providing factory methods for common :class:`Arrow <arrow.arrow.Arrow>`
construction scenarios.
"""
from __future__ import absolute_import
from arrow.arrow import Arrow
from arrow import parser
from arrow.util import is_timestamp, isstr
from datetime import datetime, tzinfo, date
from dateutil import tz as dateutil_tz
from time import struct_time
import calendar
class ArrowFactory(object):
''' A factory for generating :class:`Arrow <arrow.arrow.Arrow>` objects.
:param type: (optional) the :class:`Arrow <arrow.arrow.Arrow>`-based class to construct from.
Defaults to :class:`Arrow <arrow.arrow.Arrow>`.
'''
def __init__(self, type=Arrow):
self.type = type
def get(self, *args, **kwargs):
''' Returns an :class:`Arrow <arrow.arrow.Arrow>` object based on flexible inputs.
:param locale: (optional) a ``str`` specifying a locale for the parser. Defaults to
'en_us'.
:param tzinfo: (optional) a :ref:`timezone expression <tz-expr>` or tzinfo object.
Replaces the timezone unless using an input form that is explicitly UTC or specifies
the timezone in a positional argument. Defaults to UTC.
Usage::
>>> import arrow
**No inputs** to get current UTC time::
>>> arrow.get()
<Arrow [2013-05-08T05:51:43.316458+00:00]>
**None** to also get current UTC time::
>>> arrow.get(None)
<Arrow [2013-05-08T05:51:49.016458+00:00]>
**One** :class:`Arrow <arrow.arrow.Arrow>` object, to get a copy.
>>> arw = arrow.utcnow()
>>> arrow.get(arw)
<Arrow [2013-10-23T15:21:54.354846+00:00]>
**One** ``str``, ``float``, or ``int``, convertible to a floating-point timestamp, to get
that timestamp in UTC::
>>> arrow.get(1367992474.293378)
<Arrow [2013-05-08T05:54:34.293378+00:00]>
>>> arrow.get(1367992474)
<Arrow [2013-05-08T05:54:34+00:00]>
>>> arrow.get('1367992474.293378')
<Arrow [2013-05-08T05:54:34.293378+00:00]>
>>> arrow.get('1367992474')
<Arrow [2013-05-08T05:54:34+00:00]>
**One** ISO-8601-formatted ``str``, to parse it::
>>> arrow.get('2013-09-29T01:26:43.830580')
<Arrow [2013-09-29T01:26:43.830580+00:00]>
**One** ``tzinfo``, to get the current time **converted** to that timezone::
>>> arrow.get(tz.tzlocal())
<Arrow [2013-05-07T22:57:28.484717-07:00]>
**One** naive ``datetime``, to get that datetime in UTC::
>>> arrow.get(datetime(2013, 5, 5))
<Arrow [2013-05-05T00:00:00+00:00]>
**One** aware ``datetime``, to get that datetime::
>>> arrow.get(datetime(2013, 5, 5, tzinfo=tz.tzlocal()))
<Arrow [2013-05-05T00:00:00-07:00]>
**One** naive ``date``, to get that date in UTC::
>>> arrow.get(date(2013, 5, 5))
<Arrow [2013-05-05T00:00:00+00:00]>
**Two** arguments, a naive or aware ``datetime``, and a replacement
:ref:`timezone expression <tz-expr>`::
>>> arrow.get(datetime(2013, 5, 5), 'US/Pacific')
<Arrow [2013-05-05T00:00:00-07:00]>
**Two** arguments, a naive ``date``, and a replacement
:ref:`timezone expression <tz-expr>`::
>>> arrow.get(date(2013, 5, 5), 'US/Pacific')
<Arrow [2013-05-05T00:00:00-07:00]>
**Two** arguments, both ``str``, to parse the first according to the format of the second::
>>> arrow.get('2013-05-05 12:30:45', 'YYYY-MM-DD HH:mm:ss')
<Arrow [2013-05-05T12:30:45+00:00]>
**Two** arguments, first a ``str`` to parse and second a ``list`` of formats to try::
>>> arrow.get('2013-05-05 12:30:45', ['MM/DD/YYYY', 'YYYY-MM-DD HH:mm:ss'])
<Arrow [2013-05-05T12:30:45+00:00]>
**Three or more** arguments, as for the constructor of a ``datetime``::
>>> arrow.get(2013, 5, 5, 12, 30, 45)
<Arrow [2013-05-05T12:30:45+00:00]>
**One** time.struct time::
>>> arrow.get(gmtime(0))
<Arrow [1970-01-01T00:00:00+00:00]>
'''
arg_count = len(args)
locale = kwargs.get('locale', 'en_us')
tz = kwargs.get('tzinfo', None)
# () -> now, @ utc.
if arg_count == 0:
if isinstance(tz, tzinfo):
return self.type.now(tz)
return self.type.utcnow()
if arg_count == 1:
arg = args[0]
# (None) -> now, @ utc.
if arg is None:
return self.type.utcnow()
# try (int, float, str(int), str(float)) -> utc, from timestamp.
if is_timestamp(arg):
return self.type.utcfromtimestamp(arg)
# (Arrow) -> from the object's datetime.
if isinstance(arg, Arrow):
return self.type.fromdatetime(arg.datetime)
# (datetime) -> from datetime.
if isinstance(arg, datetime):
return self.type.fromdatetime(arg)
# (date) -> from date.
if isinstance(arg, date):
return self.type.fromdate(arg)
# (tzinfo) -> now, @ tzinfo.
elif isinstance(arg, tzinfo):
return self.type.now(arg)
# (str) -> parse.
elif isstr(arg):
dt = parser.DateTimeParser(locale).parse_iso(arg)
return self.type.fromdatetime(dt)
# (struct_time) -> from struct_time
elif isinstance(arg, struct_time):
return self.type.utcfromtimestamp(calendar.timegm(arg))
else:
raise TypeError('Can\'t parse single argument type of \'{0}\''.format(type(arg)))
elif arg_count == 2:
arg_1, arg_2 = args[0], args[1]
if isinstance(arg_1, datetime):
# (datetime, tzinfo/str) -> fromdatetime replace tzinfo.
if isinstance(arg_2, tzinfo) or isstr(arg_2):
return self.type.fromdatetime(arg_1, arg_2)
else:
raise TypeError('Can\'t parse two arguments of types \'datetime\', \'{0}\''.format(
type(arg_2)))
elif isinstance(arg_1, date):
# (date, tzinfo/str) -> fromdate replace tzinfo.
if isinstance(arg_2, tzinfo) or isstr(arg_2):
return self.type.fromdate(arg_1, tzinfo=arg_2)
else:
raise TypeError('Can\'t parse two arguments of types \'date\', \'{0}\''.format(
type(arg_2)))
# (str, format) -> parse.
elif isstr(arg_1) and (isstr(arg_2) or isinstance(arg_2, list)):
dt = parser.DateTimeParser(locale).parse(args[0], args[1])
return self.type.fromdatetime(dt, tzinfo=tz)
else:
raise TypeError('Can\'t parse two arguments of types \'{0}\', \'{1}\''.format(
type(arg_1), type(arg_2)))
# 3+ args -> datetime-like via constructor.
else:
return self.type(*args, **kwargs)
def utcnow(self):
'''Returns an :class:`Arrow <arrow.arrow.Arrow>` object, representing "now" in UTC time.
Usage::
>>> import arrow
>>> arrow.utcnow()
<Arrow [2013-05-08T05:19:07.018993+00:00]>
'''
return self.type.utcnow()
def now(self, tz=None):
'''Returns an :class:`Arrow <arrow.arrow.Arrow>` object, representing "now" in the given
timezone.
:param tz: (optional) A :ref:`timezone expression <tz-expr>`. Defaults to local time.
Usage::
>>> import arrow
>>> arrow.now()
<Arrow [2013-05-07T22:19:11.363410-07:00]>
>>> arrow.now('US/Pacific')
<Arrow [2013-05-07T22:19:15.251821-07:00]>
>>> arrow.now('+02:00')
<Arrow [2013-05-08T07:19:25.618646+02:00]>
>>> arrow.now('local')
<Arrow [2013-05-07T22:19:39.130059-07:00]>
'''
if tz is None:
tz = dateutil_tz.tzlocal()
elif not isinstance(tz, tzinfo):
tz = parser.TzinfoParser.parse(tz)
return self.type.now(tz)

View file

@ -1,105 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import calendar
import re
from dateutil import tz as dateutil_tz
from arrow import util, locales
class DateTimeFormatter(object):
_FORMAT_RE = re.compile('(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?dd?d?|HH?|hh?|mm?|ss?|SS?S?S?S?S?|ZZ?|a|A|X)')
def __init__(self, locale='en_us'):
self.locale = locales.get_locale(locale)
def format(cls, dt, fmt):
return cls._FORMAT_RE.sub(lambda m: cls._format_token(dt, m.group(0)), fmt)
def _format_token(self, dt, token):
if token == 'YYYY':
return self.locale.year_full(dt.year)
if token == 'YY':
return self.locale.year_abbreviation(dt.year)
if token == 'MMMM':
return self.locale.month_name(dt.month)
if token == 'MMM':
return self.locale.month_abbreviation(dt.month)
if token == 'MM':
return '{0:02d}'.format(dt.month)
if token == 'M':
return str(dt.month)
if token == 'DDDD':
return '{0:03d}'.format(dt.timetuple().tm_yday)
if token == 'DDD':
return str(dt.timetuple().tm_yday)
if token == 'DD':
return '{0:02d}'.format(dt.day)
if token == 'D':
return str(dt.day)
if token == 'Do':
return self.locale.ordinal_number(dt.day)
if token == 'dddd':
return self.locale.day_name(dt.isoweekday())
if token == 'ddd':
return self.locale.day_abbreviation(dt.isoweekday())
if token == 'd':
return str(dt.isoweekday())
if token == 'HH':
return '{0:02d}'.format(dt.hour)
if token == 'H':
return str(dt.hour)
if token == 'hh':
return '{0:02d}'.format(dt.hour if 0 < dt.hour < 13 else abs(dt.hour - 12))
if token == 'h':
return str(dt.hour if 0 < dt.hour < 13 else abs(dt.hour - 12))
if token == 'mm':
return '{0:02d}'.format(dt.minute)
if token == 'm':
return str(dt.minute)
if token == 'ss':
return '{0:02d}'.format(dt.second)
if token == 's':
return str(dt.second)
if token == 'SSSSSS':
return str('{0:06d}'.format(int(dt.microsecond)))
if token == 'SSSSS':
return str('{0:05d}'.format(int(dt.microsecond / 10)))
if token == 'SSSS':
return str('{0:04d}'.format(int(dt.microsecond / 100)))
if token == 'SSS':
return str('{0:03d}'.format(int(dt.microsecond / 1000)))
if token == 'SS':
return str('{0:02d}'.format(int(dt.microsecond / 10000)))
if token == 'S':
return str(int(dt.microsecond / 100000))
if token == 'X':
return str(calendar.timegm(dt.utctimetuple()))
if token in ['ZZ', 'Z']:
separator = ':' if token == 'ZZ' else ''
tz = dateutil_tz.tzutc() if dt.tzinfo is None else dt.tzinfo
total_minutes = int(util.total_seconds(tz.utcoffset(dt)) / 60)
sign = '+' if total_minutes >= 0 else '-'
total_minutes = abs(total_minutes)
hour, minute = divmod(total_minutes, 60)
return '{0}{1:02d}{2}{3:02d}'.format(sign, hour, separator, minute)
if token in ('a', 'A'):
return self.locale.meridian(dt.hour, token)

File diff suppressed because it is too large Load diff

View file

@ -1,344 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
from datetime import datetime
from dateutil import tz
import re
try:
from functools import lru_cache
except ImportError: # pragma: no cover
from backports.functools_lru_cache import lru_cache # pragma: no cover
from arrow import locales
class ParserError(RuntimeError):
pass
class DateTimeParser(object):
_FORMAT_RE = re.compile('(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|X)')
_ESCAPE_RE = re.compile('\[[^\[\]]*\]')
_ONE_OR_MORE_DIGIT_RE = re.compile('\d+')
_ONE_OR_TWO_DIGIT_RE = re.compile('\d{1,2}')
_FOUR_DIGIT_RE = re.compile('\d{4}')
_TWO_DIGIT_RE = re.compile('\d{2}')
_TZ_RE = re.compile('[+\-]?\d{2}:?(\d{2})?')
_TZ_NAME_RE = re.compile('\w[\w+\-/]+')
_BASE_INPUT_RE_MAP = {
'YYYY': _FOUR_DIGIT_RE,
'YY': _TWO_DIGIT_RE,
'MM': _TWO_DIGIT_RE,
'M': _ONE_OR_TWO_DIGIT_RE,
'DD': _TWO_DIGIT_RE,
'D': _ONE_OR_TWO_DIGIT_RE,
'HH': _TWO_DIGIT_RE,
'H': _ONE_OR_TWO_DIGIT_RE,
'hh': _TWO_DIGIT_RE,
'h': _ONE_OR_TWO_DIGIT_RE,
'mm': _TWO_DIGIT_RE,
'm': _ONE_OR_TWO_DIGIT_RE,
'ss': _TWO_DIGIT_RE,
's': _ONE_OR_TWO_DIGIT_RE,
'X': re.compile('\d+'),
'ZZZ': _TZ_NAME_RE,
'ZZ': _TZ_RE,
'Z': _TZ_RE,
'S': _ONE_OR_MORE_DIGIT_RE,
}
MARKERS = ['YYYY', 'MM', 'DD']
SEPARATORS = ['-', '/', '.']
def __init__(self, locale='en_us', cache_size=0):
self.locale = locales.get_locale(locale)
self._input_re_map = self._BASE_INPUT_RE_MAP.copy()
self._input_re_map.update({
'MMMM': self._choice_re(self.locale.month_names[1:], re.IGNORECASE),
'MMM': self._choice_re(self.locale.month_abbreviations[1:],
re.IGNORECASE),
'Do': re.compile(self.locale.ordinal_day_re),
'dddd': self._choice_re(self.locale.day_names[1:], re.IGNORECASE),
'ddd': self._choice_re(self.locale.day_abbreviations[1:],
re.IGNORECASE),
'd': re.compile(r"[1-7]"),
'a': self._choice_re(
(self.locale.meridians['am'], self.locale.meridians['pm'])
),
# note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to
# ensure backwards compatibility of this token
'A': self._choice_re(self.locale.meridians.values())
})
if cache_size > 0:
self._generate_pattern_re =\
lru_cache(maxsize=cache_size)(self._generate_pattern_re)
def parse_iso(self, string):
has_time = 'T' in string or ' ' in string.strip()
space_divider = ' ' in string.strip()
if has_time:
if space_divider:
date_string, time_string = string.split(' ', 1)
else:
date_string, time_string = string.split('T', 1)
time_parts = re.split('[+-]', time_string, 1)
has_tz = len(time_parts) > 1
has_seconds = time_parts[0].count(':') > 1
has_subseconds = re.search('[.,]', time_parts[0])
if has_subseconds:
formats = ['YYYY-MM-DDTHH:mm:ss%sS' % has_subseconds.group()]
elif has_seconds:
formats = ['YYYY-MM-DDTHH:mm:ss']
else:
formats = ['YYYY-MM-DDTHH:mm']
else:
has_tz = False
# generate required formats: YYYY-MM-DD, YYYY-MM-DD, YYYY
# using various separators: -, /, .
l = len(self.MARKERS)
formats = [separator.join(self.MARKERS[:l-i])
for i in range(l)
for separator in self.SEPARATORS]
if has_time and has_tz:
formats = [f + 'Z' for f in formats]
if space_divider:
formats = [item.replace('T', ' ', 1) for item in formats]
return self._parse_multiformat(string, formats)
def _generate_pattern_re(self, fmt):
# fmt is a string of tokens like 'YYYY-MM-DD'
# we construct a new string by replacing each
# token by its pattern:
# 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})'
tokens = []
offset = 0
# Extract the bracketed expressions to be reinserted later.
escaped_fmt = re.sub(self._ESCAPE_RE, "#", fmt)
# Any number of S is the same as one.
escaped_fmt = re.sub('S+', 'S', escaped_fmt)
escaped_data = re.findall(self._ESCAPE_RE, fmt)
fmt_pattern = escaped_fmt
for m in self._FORMAT_RE.finditer(escaped_fmt):
token = m.group(0)
try:
input_re = self._input_re_map[token]
except KeyError:
raise ParserError('Unrecognized token \'{0}\''.format(token))
input_pattern = '(?P<{0}>{1})'.format(token, input_re.pattern)
tokens.append(token)
# a pattern doesn't have the same length as the token
# it replaces! We keep the difference in the offset variable.
# This works because the string is scanned left-to-right and matches
# are returned in the order found by finditer.
fmt_pattern = fmt_pattern[:m.start() + offset] + input_pattern + fmt_pattern[m.end() + offset:]
offset += len(input_pattern) - (m.end() - m.start())
final_fmt_pattern = ""
a = fmt_pattern.split("#")
b = escaped_data
# Due to the way Python splits, 'a' will always be longer
for i in range(len(a)):
final_fmt_pattern += a[i]
if i < len(b):
final_fmt_pattern += b[i][1:-1]
return tokens, re.compile(final_fmt_pattern, flags=re.IGNORECASE)
def parse(self, string, fmt):
if isinstance(fmt, list):
return self._parse_multiformat(string, fmt)
fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)
match = fmt_pattern_re.search(string)
if match is None:
raise ParserError('Failed to match \'{0}\' when parsing \'{1}\''
.format(fmt_pattern_re.pattern, string))
parts = {}
for token in fmt_tokens:
if token == 'Do':
value = match.group('value')
else:
value = match.group(token)
self._parse_token(token, value, parts)
return self._build_datetime(parts)
def _parse_token(self, token, value, parts):
if token == 'YYYY':
parts['year'] = int(value)
elif token == 'YY':
value = int(value)
parts['year'] = 1900 + value if value > 68 else 2000 + value
elif token in ['MMMM', 'MMM']:
parts['month'] = self.locale.month_number(value.lower())
elif token in ['MM', 'M']:
parts['month'] = int(value)
elif token in ['DD', 'D']:
parts['day'] = int(value)
elif token in ['Do']:
parts['day'] = int(value)
elif token.upper() in ['HH', 'H']:
parts['hour'] = int(value)
elif token in ['mm', 'm']:
parts['minute'] = int(value)
elif token in ['ss', 's']:
parts['second'] = int(value)
elif token == 'S':
# We have the *most significant* digits of an arbitrary-precision integer.
# We want the six most significant digits as an integer, rounded.
# FIXME: add nanosecond support somehow?
value = value.ljust(7, str('0'))
# floating-point (IEEE-754) defaults to half-to-even rounding
seventh_digit = int(value[6])
if seventh_digit == 5:
rounding = int(value[5]) % 2
elif seventh_digit > 5:
rounding = 1
else:
rounding = 0
parts['microsecond'] = int(value[:6]) + rounding
elif token == 'X':
parts['timestamp'] = int(value)
elif token in ['ZZZ', 'ZZ', 'Z']:
parts['tzinfo'] = TzinfoParser.parse(value)
elif token in ['a', 'A']:
if value in (
self.locale.meridians['am'],
self.locale.meridians['AM']
):
parts['am_pm'] = 'am'
elif value in (
self.locale.meridians['pm'],
self.locale.meridians['PM']
):
parts['am_pm'] = 'pm'
@staticmethod
def _build_datetime(parts):
timestamp = parts.get('timestamp')
if timestamp:
tz_utc = tz.tzutc()
return datetime.fromtimestamp(timestamp, tz=tz_utc)
am_pm = parts.get('am_pm')
hour = parts.get('hour', 0)
if am_pm == 'pm' and hour < 12:
hour += 12
elif am_pm == 'am' and hour == 12:
hour = 0
return datetime(year=parts.get('year', 1), month=parts.get('month', 1),
day=parts.get('day', 1), hour=hour, minute=parts.get('minute', 0),
second=parts.get('second', 0), microsecond=parts.get('microsecond', 0),
tzinfo=parts.get('tzinfo'))
def _parse_multiformat(self, string, formats):
_datetime = None
for fmt in formats:
try:
_datetime = self.parse(string, fmt)
break
except ParserError:
pass
if _datetime is None:
raise ParserError('Could not match input to any of {0} on \'{1}\''.format(formats, string))
return _datetime
@staticmethod
def _map_lookup(input_map, key):
try:
return input_map[key]
except KeyError:
raise ParserError('Could not match "{0}" to {1}'.format(key, input_map))
@staticmethod
def _try_timestamp(string):
try:
return float(string)
except:
return None
@staticmethod
def _choice_re(choices, flags=0):
return re.compile('({0})'.format('|'.join(choices)), flags=flags)
class TzinfoParser(object):
_TZINFO_RE = re.compile('([+\-])?(\d\d):?(\d\d)?')
@classmethod
def parse(cls, string):
tzinfo = None
if string == 'local':
tzinfo = tz.tzlocal()
elif string in ['utc', 'UTC']:
tzinfo = tz.tzutc()
else:
iso_match = cls._TZINFO_RE.match(string)
if iso_match:
sign, hours, minutes = iso_match.groups()
if minutes is None:
minutes = 0
seconds = int(hours) * 3600 + int(minutes) * 60
if sign == '-':
seconds *= -1
tzinfo = tz.tzoffset(None, seconds)
else:
tzinfo = tz.gettz(string)
if tzinfo is None:
raise ParserError('Could not parse timezone expression "{0}"'.format(string))
return tzinfo

View file

@ -1,47 +0,0 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import sys
# python 2.6 / 2.7 definitions for total_seconds function.
def _total_seconds_27(td): # pragma: no cover
return td.total_seconds()
def _total_seconds_26(td):
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 1e6) / 1e6
# get version info and assign correct total_seconds function.
version = '{0}.{1}.{2}'.format(*sys.version_info[:3])
if version < '2.7': # pragma: no cover
total_seconds = _total_seconds_26
else: # pragma: no cover
total_seconds = _total_seconds_27
def is_timestamp(value):
if type(value) == bool:
return False
try:
float(value)
return True
except:
return False
# python 2.7 / 3.0+ definitions for isstr function.
try: # pragma: no cover
basestring
def isstr(s):
return isinstance(s, basestring)
except NameError: #pragma: no cover
def isstr(s):
return isinstance(s, str)
__all__ = ['total_seconds', 'is_timestamp', 'isstr']

View file

@ -1,39 +0,0 @@
######################## BEGIN LICENSE BLOCK ########################
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .compat import PY2, PY3
from .universaldetector import UniversalDetector
from .version import __version__, VERSION
def detect(byte_str):
"""
Detect the encoding of the given byte string.
:param byte_str: The byte sequence to examine.
:type byte_str: ``bytes`` or ``bytearray``
"""
if not isinstance(byte_str, bytearray):
if not isinstance(byte_str, bytes):
raise TypeError('Expected object of type bytes or bytearray, got: '
'{0}'.format(type(byte_str)))
else:
byte_str = bytearray(byte_str)
detector = UniversalDetector()
detector.feed(byte_str)
return detector.close()

View file

@ -1,386 +0,0 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
# Big5 frequency table
# by Taiwan's Mandarin Promotion Council
# <http://www.edu.tw:81/mandr/>
#
# 128 --> 0.42261
# 256 --> 0.57851
# 512 --> 0.74851
# 1024 --> 0.89384
# 2048 --> 0.97583
#
# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98
# Random Distribution Ration = 512/(5401-512)=0.105
#
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
#Char to FreqOrder table
BIG5_TABLE_SIZE = 5376
BIG5_CHAR_TO_FREQ_ORDER = (
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48
63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64
3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80
4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96
5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112
630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128
179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144
995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160
2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176
1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192
3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208
706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240
3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256
2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272
437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288
3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304
1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320
5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336
266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352
5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368
1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384
32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400
188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416
3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432
3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448
324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464
2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480
2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496
314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512
287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528
3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544
1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560
1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576
1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592
2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608
265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624
4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640
1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656
5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672
2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688
383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704
98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720
523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736
710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752
5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768
379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784
1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800
585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816
690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832
5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848
1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864
544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880
3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896
4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912
3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928
279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944
610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960
1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976
4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992
3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008
3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024
2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040
5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056
3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072
5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088
1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104
2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120
1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136
78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152
1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168
4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184
3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200
534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216
165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232
626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248
2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264
5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280
1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296
2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312
1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328
1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344
5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360
5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376
5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392
3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408
4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424
4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440
2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456
5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472
3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488
598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504
5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520
5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536
1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552
2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568
3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584
4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600
5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616
3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632
4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648
1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664
1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680
4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696
1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712
240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728
1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744
1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760
3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776
619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792
5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808
2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824
1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840
1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856
5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872
829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888
4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904
375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920
2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936
444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952
1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968
1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984
730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000
4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016
4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032
1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048
3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064
5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080
5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096
1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112
2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128
1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144
3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160
2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176
3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192
2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208
4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224
4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240
3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256
97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272
3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288
424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304
3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320
4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336
3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352
1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368
5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384
199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400
5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416
1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432
391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448
4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464
4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480
397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496
2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512
2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528
3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544
1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560
4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576
2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592
1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608
1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624
2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640
3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656
1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672
5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688
1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704
4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720
1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736
135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752
1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768
4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784
4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800
2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816
1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832
4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848
660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864
5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880
2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896
3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912
4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928
790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944
5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960
5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976
1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992
4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008
4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024
2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040
3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056
3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072
2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088
1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104
4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120
3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136
3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152
2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168
4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184
5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200
3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216
2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232
3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248
1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264
2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280
3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296
4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312
2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328
2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344
5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360
1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376
2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392
1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408
3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424
4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440
2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456
3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472
3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488
2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504
4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520
2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536
3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552
4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568
5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584
3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600
194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616
1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632
4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648
1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664
4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680
5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696
510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712
5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728
5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744
2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760
3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776
2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792
2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808
681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824
1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840
4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856
3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872
3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888
838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904
2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920
625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936
2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952
4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968
1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984
4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000
1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016
3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032
574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048
3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064
5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080
5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096
3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112
3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128
1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144
2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160
5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176
1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192
1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208
3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224
919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240
1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256
4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272
5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288
2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304
3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320
516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336
1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352
2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368
2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384
5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400
5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416
5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432
2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448
2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464
1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480
4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496
3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512
3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528
4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544
4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560
2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576
2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592
5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608
4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624
5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640
4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656
502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672
121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688
1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704
3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720
4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736
1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752
5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768
2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784
2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800
3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816
5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832
1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848
3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864
5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880
1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896
5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912
2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928
3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944
2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960
3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976
3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992
3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008
4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024
803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040
2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056
4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072
3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088
5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104
1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120
5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136
425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152
1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168
479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184
4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200
1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216
4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232
1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248
433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264
3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280
4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296
5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312
938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328
3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376
)

View file

@ -1,47 +0,0 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import Big5DistributionAnalysis
from .mbcssm import BIG5_SM_MODEL
class Big5Prober(MultiByteCharSetProber):
def __init__(self):
super(Big5Prober, self).__init__()
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
self.distribution_analyzer = Big5DistributionAnalysis()
self.reset()
@property
def charset_name(self):
return "Big5"
@property
def language(self):
return "Chinese"

View file

@ -1,233 +0,0 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE,
EUCTW_TYPICAL_DISTRIBUTION_RATIO)
from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE,
EUCKR_TYPICAL_DISTRIBUTION_RATIO)
from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE,
GB2312_TYPICAL_DISTRIBUTION_RATIO)
from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE,
BIG5_TYPICAL_DISTRIBUTION_RATIO)
from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE,
JIS_TYPICAL_DISTRIBUTION_RATIO)
class CharDistributionAnalysis(object):
ENOUGH_DATA_THRESHOLD = 1024
SURE_YES = 0.99
SURE_NO = 0.01
MINIMUM_DATA_THRESHOLD = 3
def __init__(self):
# Mapping table to get frequency order from char order (get from
# GetOrder())
self._char_to_freq_order = None
self._table_size = None # Size of above table
# This is a constant value which varies from language to language,
# used in calculating confidence. See
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
# for further detail.
self.typical_distribution_ratio = None
self._done = None
self._total_chars = None
self._freq_chars = None
self.reset()
def reset(self):
"""reset analyser, clear any state"""
# If this flag is set to True, detection is done and conclusion has
# been made
self._done = False
self._total_chars = 0 # Total characters encountered
# The number of characters whose frequency order is less than 512
self._freq_chars = 0
def feed(self, char, char_len):
"""feed a character with known length"""
if char_len == 2:
# we only care about 2-bytes character in our distribution analysis
order = self.get_order(char)
else:
order = -1
if order >= 0:
self._total_chars += 1
# order is valid
if order < self._table_size:
if 512 > self._char_to_freq_order[order]:
self._freq_chars += 1
def get_confidence(self):
"""return confidence based on existing data"""
# if we didn't receive any character in our consideration range,
# return negative answer
if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD:
return self.SURE_NO
if self._total_chars != self._freq_chars:
r = (self._freq_chars / ((self._total_chars - self._freq_chars)
* self.typical_distribution_ratio))
if r < self.SURE_YES:
return r
# normalize confidence (we don't want to be 100% sure)
return self.SURE_YES
def got_enough_data(self):
# It is not necessary to receive all data to draw conclusion.
# For charset detection, certain amount of data is enough
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
def get_order(self, byte_str):
# We do not handle characters based on the original encoding string,
# but convert this encoding string to a number, here called order.
# This allows multiple encodings of a language to share one frequency
# table.
return -1
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
super(EUCTWDistributionAnalysis, self).__init__()
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
self._table_size = EUCTW_TABLE_SIZE
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
# for euc-TW encoding, we are interested
# first byte range: 0xc4 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char = byte_str[0]
if first_char >= 0xC4:
return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
else:
return -1
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
super(EUCKRDistributionAnalysis, self).__init__()
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
self._table_size = EUCKR_TABLE_SIZE
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
# for euc-KR encoding, we are interested
# first byte range: 0xb0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char = byte_str[0]
if first_char >= 0xB0:
return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
else:
return -1
class GB2312DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
super(GB2312DistributionAnalysis, self).__init__()
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
self._table_size = GB2312_TABLE_SIZE
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
# for GB2312 encoding, we are interested
# first byte range: 0xb0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char, second_char = byte_str[0], byte_str[1]
if (first_char >= 0xB0) and (second_char >= 0xA1):
return 94 * (first_char - 0xB0) + second_char - 0xA1
else:
return -1
class Big5DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
super(Big5DistributionAnalysis, self).__init__()
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
self._table_size = BIG5_TABLE_SIZE
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
# for big5 encoding, we are interested
# first byte range: 0xa4 -- 0xfe
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
# no validation needed here. State machine has done that
first_char, second_char = byte_str[0], byte_str[1]
if first_char >= 0xA4:
if second_char >= 0xA1:
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
else:
return 157 * (first_char - 0xA4) + second_char - 0x40
else:
return -1
class SJISDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
super(SJISDistributionAnalysis, self).__init__()
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
self._table_size = JIS_TABLE_SIZE
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
# for sjis encoding, we are interested
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
# no validation needed here. State machine has done that
first_char, second_char = byte_str[0], byte_str[1]
if (first_char >= 0x81) and (first_char <= 0x9F):
order = 188 * (first_char - 0x81)
elif (first_char >= 0xE0) and (first_char <= 0xEF):
order = 188 * (first_char - 0xE0 + 31)
else:
return -1
order = order + second_char - 0x40
if second_char > 0x7F:
order = -1
return order
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
super(EUCJPDistributionAnalysis, self).__init__()
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
self._table_size = JIS_TABLE_SIZE
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
# for euc-JP encoding, we are interested
# first byte range: 0xa0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
char = byte_str[0]
if char >= 0xA0:
return 94 * (char - 0xA1) + byte_str[1] - 0xa1
else:
return -1

View file

@ -1,106 +0,0 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .enums import ProbingState
from .charsetprober import CharSetProber
class CharSetGroupProber(CharSetProber):
def __init__(self, lang_filter=None):
super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
self._active_num = 0
self.probers = []
self._best_guess_prober = None
def reset(self):
super(CharSetGroupProber, self).reset()
self._active_num = 0
for prober in self.probers:
if prober:
prober.reset()
prober.active = True
self._active_num += 1
self._best_guess_prober = None
@property
def charset_name(self):
if not self._best_guess_prober:
self.get_confidence()
if not self._best_guess_prober:
return None
return self._best_guess_prober.charset_name
@property
def language(self):
if not self._best_guess_prober:
self.get_confidence()
if not self._best_guess_prober:
return None
return self._best_guess_prober.language
def feed(self, byte_str):
for prober in self.probers:
if not prober:
continue
if not prober.active:
continue
state = prober.feed(byte_str)
if not state:
continue
if state == ProbingState.FOUND_IT:
self._best_guess_prober = prober
return self.state
elif state == ProbingState.NOT_ME:
prober.active = False
self._active_num -= 1
if self._active_num <= 0:
self._state = ProbingState.NOT_ME
return self.state
return self.state
def get_confidence(self):
state = self.state
if state == ProbingState.FOUND_IT:
return 0.99
elif state == ProbingState.NOT_ME:
return 0.01
best_conf = 0.0
self._best_guess_prober = None
for prober in self.probers:
if not prober:
continue
if not prober.active:
self.logger.debug('%s not active', prober.charset_name)
continue
conf = prober.get_confidence()
self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
if best_conf < conf:
best_conf = conf
self._best_guess_prober = prober
if not self._best_guess_prober:
return 0.0
return best_conf

View file

@ -1,145 +0,0 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import logging
import re
from .enums import ProbingState
class CharSetProber(object):
SHORTCUT_THRESHOLD = 0.95
def __init__(self, lang_filter=None):
self._state = None
self.lang_filter = lang_filter
self.logger = logging.getLogger(__name__)
def reset(self):
self._state = ProbingState.DETECTING
@property
def charset_name(self):
return None
def feed(self, buf):
pass
@property
def state(self):
return self._state
def get_confidence(self):
return 0.0
@staticmethod
def filter_high_byte_only(buf):
buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
return buf
@staticmethod
def filter_international_words(buf):
"""
We define three types of bytes:
alphabet: english alphabets [a-zA-Z]
international: international characters [\x80-\xFF]
marker: everything else [^a-zA-Z\x80-\xFF]
The input buffer can be thought to contain a series of words delimited
by markers. This function works to filter all words that contain at
least one international character. All contiguous sequences of markers
are replaced by a single space ascii character.
This filter applies to all scripts which do not use English characters.
"""
filtered = bytearray()
# This regex expression filters out only words that have at-least one
# international character. The word may include one marker character at
# the end.
words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
buf)
for word in words:
filtered.extend(word[:-1])
# If the last character in the word is a marker, replace it with a
# space as markers shouldn't affect our analysis (they are used
# similarly across all languages and may thus have similar
# frequencies).
last_char = word[-1:]
if not last_char.isalpha() and last_char < b'\x80':
last_char = b' '
filtered.extend(last_char)
return filtered
@staticmethod
def filter_with_english_letters(buf):
"""
Returns a copy of ``buf`` that retains only the sequences of English
alphabet and high byte characters that are not between <> characters.
Also retains English alphabet and high byte characters immediately
before occurrences of >.
This filter can be applied to all scripts which contain both English
characters and extended ASCII characters, but is currently only used by
``Latin1Prober``.
"""
filtered = bytearray()
in_tag = False
prev = 0
for curr in range(len(buf)):
# Slice here to get bytes instead of an int with Python 3
buf_char = buf[curr:curr + 1]
# Check if we're coming out of or entering an HTML tag
if buf_char == b'>':
in_tag = False
elif buf_char == b'<':
in_tag = True
# If current character is not extended-ASCII and not alphabetic...
if buf_char < b'\x80' and not buf_char.isalpha():
# ...and we're not in a tag
if curr > prev and not in_tag:
# Keep everything after last non-extended-ASCII,
# non-alphabetic character
filtered.extend(buf[prev:curr])
# Output a space to delimit stretch we kept
filtered.extend(b' ')
prev = curr + 1
# If we're not in a tag...
if not in_tag:
# Keep everything after last non-extended-ASCII, non-alphabetic
# character
filtered.extend(buf[prev:])
return filtered

View file

@ -1 +0,0 @@

View file

@ -1,85 +0,0 @@
#!/usr/bin/env python
"""
Script which takes one or more file paths and reports on their detected
encodings
Example::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
If no paths are provided, it takes its input from stdin.
"""
from __future__ import absolute_import, print_function, unicode_literals
import argparse
import sys
from chardet import __version__
from chardet.compat import PY2
from chardet.universaldetector import UniversalDetector
def description_of(lines, name='stdin'):
"""
Return a string describing the probable encoding of a file or
list of strings.
:param lines: The lines to get the encoding of.
:type lines: Iterable of bytes
:param name: Name of file or collection of lines
:type name: str
"""
u = UniversalDetector()
for line in lines:
line = bytearray(line)
u.feed(line)
# shortcut out of the loop to save reading further - particularly useful if we read a BOM.
if u.done:
break
u.close()
result = u.result
if PY2:
name = name.decode(sys.getfilesystemencoding(), 'ignore')
if result['encoding']:
return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
result['confidence'])
else:
return '{0}: no result'.format(name)
def main(argv=None):
"""
Handles command line arguments and gets things started.
:param argv: List of arguments, as if specified on the command-line.
If None, ``sys.argv[1:]`` is used instead.
:type argv: list of str
"""
# Get command line arguments
parser = argparse.ArgumentParser(
description="Takes one or more file paths and reports their detected \
encodings")
parser.add_argument('input',
help='File whose encoding we would like to determine. \
(default: stdin)',
type=argparse.FileType('rb'), nargs='*',
default=[sys.stdin if PY2 else sys.stdin.buffer])
parser.add_argument('--version', action='version',
version='%(prog)s {0}'.format(__version__))
args = parser.parse_args(argv)
for f in args.input:
if f.isatty():
print("You are running chardetect interactively. Press " +
"CTRL-D twice at the start of a blank line to signal the " +
"end of your input. If you want help, run chardetect " +
"--help\n", file=sys.stderr)
print(description_of(f, f.name))
if __name__ == '__main__':
main()

View file

@ -1,88 +0,0 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import logging
from .enums import MachineState
class CodingStateMachine(object):
"""
A state machine to verify a byte sequence for a particular encoding. For
each byte the detector receives, it will feed that byte to every active
state machine available, one byte at a time. The state machine changes its
state based on its previous state and the byte it receives. There are 3
states in a state machine that are of interest to an auto-detector:
START state: This is the state to start with, or a legal byte sequence
(i.e. a valid code point) for character has been identified.
ME state: This indicates that the state machine identified a byte sequence
that is specific to the charset it is designed for and that
there is no other possible encoding which can contain this byte
sequence. This will to lead to an immediate positive answer for
the detector.
ERROR state: This indicates the state machine identified an illegal byte
sequence for that encoding. This will lead to an immediate
negative answer for this encoding. Detector will exclude this
encoding from consideration from here on.
"""
def __init__(self, sm):
self._model = sm
self._curr_byte_pos = 0
self._curr_char_len = 0
self._curr_state = None
self.logger = logging.getLogger(__name__)
self.reset()
def reset(self):
self._curr_state = MachineState.START
def next_state(self, c):
# for each byte we get its class
# if it is first byte, we also get byte length
byte_class = self._model['class_table'][c]
if self._curr_state == MachineState.START:
self._curr_byte_pos = 0
self._curr_char_len = self._model['char_len_table'][byte_class]
# from byte's class and state_table, we get its next state
curr_state = (self._curr_state * self._model['class_factor']
+ byte_class)
self._curr_state = self._model['state_table'][curr_state]
self._curr_byte_pos += 1
return self._curr_state
def get_current_charlen(self):
return self._curr_char_len
def get_coding_state_machine(self):
return self._model['name']
@property
def language(self):
return self._model['language']

View file

@ -1,34 +0,0 @@
######################## BEGIN LICENSE BLOCK ########################
# Contributor(s):
# Dan Blanchard
# Ian Cordasco
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
import sys
if sys.version_info < (3, 0):
PY2 = True
PY3 = False
base_str = (str, unicode)
text_type = unicode
else:
PY2 = False
PY3 = True
base_str = (bytes, str)
text_type = str

View file

@ -1,49 +0,0 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .chardistribution import EUCKRDistributionAnalysis
from .codingstatemachine import CodingStateMachine
from .mbcharsetprober import MultiByteCharSetProber
from .mbcssm import CP949_SM_MODEL
class CP949Prober(MultiByteCharSetProber):
def __init__(self):
super(CP949Prober, self).__init__()
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
# not different.
self.distribution_analyzer = EUCKRDistributionAnalysis()
self.reset()
@property
def charset_name(self):
return "CP949"
@property
def language(self):
return "Korean"

View file

@ -1,76 +0,0 @@
"""
All of the Enums that are used throughout the chardet package.
:author: Dan Blanchard (dan.blanchard@gmail.com)
"""
class InputState(object):
"""
This enum represents the different states a universal detector can be in.
"""
PURE_ASCII = 0
ESC_ASCII = 1
HIGH_BYTE = 2
class LanguageFilter(object):
"""
This enum represents the different language filters we can apply to a
``UniversalDetector``.
"""
CHINESE_SIMPLIFIED = 0x01
CHINESE_TRADITIONAL = 0x02
JAPANESE = 0x04
KOREAN = 0x08
NON_CJK = 0x10
ALL = 0x1F
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
CJK = CHINESE | JAPANESE | KOREAN
class ProbingState(object):
"""
This enum represents the different states a prober can be in.
"""
DETECTING = 0
FOUND_IT = 1
NOT_ME = 2
class MachineState(object):
"""
This enum represents the different states a state machine can be in.
"""
START = 0
ERROR = 1
ITS_ME = 2
class SequenceLikelihood(object):
"""
This enum represents the likelihood of a character following the previous one.
"""
NEGATIVE = 0
UNLIKELY = 1
LIKELY = 2
POSITIVE = 3
@classmethod
def get_num_categories(cls):
""":returns: The number of likelihood categories in the enum."""
return 4
class CharacterCategory(object):
"""
This enum represents the different categories language models for
``SingleByteCharsetProber`` put characters into.
Anything less than CONTROL is considered a letter.
"""
UNDEFINED = 255
LINE_BREAK = 254
SYMBOL = 253
DIGIT = 252
CONTROL = 251

View file

@ -1,101 +0,0 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
from .codingstatemachine import CodingStateMachine
from .enums import LanguageFilter, ProbingState, MachineState
from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
ISO2022KR_SM_MODEL)
class EscCharSetProber(CharSetProber):
"""
This CharSetProber uses a "code scheme" approach for detecting encodings,
whereby easily recognizable escape or shift sequences are relied on to
identify these encodings.
"""
def __init__(self, lang_filter=None):
super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
self.coding_sm = []
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
if self.lang_filter & LanguageFilter.JAPANESE:
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
if self.lang_filter & LanguageFilter.KOREAN:
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
self.active_sm_count = None
self._detected_charset = None
self._detected_language = None
self._state = None
self.reset()
def reset(self):
super(EscCharSetProber, self).reset()
for coding_sm in self.coding_sm:
if not coding_sm:
continue
coding_sm.active = True
coding_sm.reset()
self.active_sm_count = len(self.coding_sm)
self._detected_charset = None
self._detected_language = None
@property
def charset_name(self):
return self._detected_charset
@property
def language(self):
return self._detected_language
def get_confidence(self):
if self._detected_charset:
return 0.99
else:
return 0.00
def feed(self, byte_str):
for c in byte_str:
for coding_sm in self.coding_sm:
if not coding_sm or not coding_sm.active:
continue
coding_state = coding_sm.next_state(c)
if coding_state == MachineState.ERROR:
coding_sm.active = False
self.active_sm_count -= 1
if self.active_sm_count <= 0:
self._state = ProbingState.NOT_ME
return self.state
elif coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
self._detected_charset = coding_sm.get_coding_state_machine()
self._detected_language = coding_sm.language
return self.state
return self.state

View file

@ -1,246 +0,0 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is mozilla.org code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .enums import MachineState
HZ_CLS = (
1,0,0,0,0,0,0,0, # 00 - 07
0,0,0,0,0,0,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,1,0,0,0,0, # 18 - 1f
0,0,0,0,0,0,0,0, # 20 - 27
0,0,0,0,0,0,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,0,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,4,0,5,2,0, # 78 - 7f
1,1,1,1,1,1,1,1, # 80 - 87
1,1,1,1,1,1,1,1, # 88 - 8f
1,1,1,1,1,1,1,1, # 90 - 97
1,1,1,1,1,1,1,1, # 98 - 9f
1,1,1,1,1,1,1,1, # a0 - a7
1,1,1,1,1,1,1,1, # a8 - af
1,1,1,1,1,1,1,1, # b0 - b7
1,1,1,1,1,1,1,1, # b8 - bf
1,1,1,1,1,1,1,1, # c0 - c7
1,1,1,1,1,1,1,1, # c8 - cf
1,1,1,1,1,1,1,1, # d0 - d7
1,1,1,1,1,1,1,1, # d8 - df
1,1,1,1,1,1,1,1, # e0 - e7
1,1,1,1,1,1,1,1, # e8 - ef
1,1,1,1,1,1,1,1, # f0 - f7
1,1,1,1,1,1,1,1, # f8 - ff
)
HZ_ST = (
MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17
5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f
4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27
4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f
)
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
HZ_SM_MODEL = {'class_table': HZ_CLS,
'class_factor': 6,
'state_table': HZ_ST,
'char_len_table': HZ_CHAR_LEN_TABLE,
'name': "HZ-GB-2312",
'language': 'Chinese'}
ISO2022CN_CLS = (
2,0,0,0,0,0,0,0, # 00 - 07
0,0,0,0,0,0,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,1,0,0,0,0, # 18 - 1f
0,0,0,0,0,0,0,0, # 20 - 27
0,3,0,0,0,0,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,4,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
2,2,2,2,2,2,2,2, # 80 - 87
2,2,2,2,2,2,2,2, # 88 - 8f
2,2,2,2,2,2,2,2, # 90 - 97
2,2,2,2,2,2,2,2, # 98 - 9f
2,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
2,2,2,2,2,2,2,2, # e0 - e7
2,2,2,2,2,2,2,2, # e8 - ef
2,2,2,2,2,2,2,2, # f0 - f7
2,2,2,2,2,2,2,2, # f8 - ff
)
ISO2022CN_ST = (
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27
5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f
)
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS,
'class_factor': 9,
'state_table': ISO2022CN_ST,
'char_len_table': ISO2022CN_CHAR_LEN_TABLE,
'name': "ISO-2022-CN",
'language': 'Chinese'}
ISO2022JP_CLS = (
2,0,0,0,0,0,0,0, # 00 - 07
0,0,0,0,0,0,2,2, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,1,0,0,0,0, # 18 - 1f
0,0,0,0,7,0,0,0, # 20 - 27
3,0,0,0,0,0,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
6,0,4,0,8,0,0,0, # 40 - 47
0,9,5,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
2,2,2,2,2,2,2,2, # 80 - 87
2,2,2,2,2,2,2,2, # 88 - 8f
2,2,2,2,2,2,2,2, # 90 - 97
2,2,2,2,2,2,2,2, # 98 - 9f
2,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
2,2,2,2,2,2,2,2, # e0 - e7
2,2,2,2,2,2,2,2, # e8 - ef
2,2,2,2,2,2,2,2, # f0 - f7
2,2,2,2,2,2,2,2, # f8 - ff
)
ISO2022JP_ST = (
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f
MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47
)
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS,
'class_factor': 10,
'state_table': ISO2022JP_ST,
'char_len_table': ISO2022JP_CHAR_LEN_TABLE,
'name': "ISO-2022-JP",
'language': 'Japanese'}
ISO2022KR_CLS = (
2,0,0,0,0,0,0,0, # 00 - 07
0,0,0,0,0,0,0,0, # 08 - 0f
0,0,0,0,0,0,0,0, # 10 - 17
0,0,0,1,0,0,0,0, # 18 - 1f
0,0,0,0,3,0,0,0, # 20 - 27
0,4,0,0,0,0,0,0, # 28 - 2f
0,0,0,0,0,0,0,0, # 30 - 37
0,0,0,0,0,0,0,0, # 38 - 3f
0,0,0,5,0,0,0,0, # 40 - 47
0,0,0,0,0,0,0,0, # 48 - 4f
0,0,0,0,0,0,0,0, # 50 - 57
0,0,0,0,0,0,0,0, # 58 - 5f
0,0,0,0,0,0,0,0, # 60 - 67
0,0,0,0,0,0,0,0, # 68 - 6f
0,0,0,0,0,0,0,0, # 70 - 77
0,0,0,0,0,0,0,0, # 78 - 7f
2,2,2,2,2,2,2,2, # 80 - 87
2,2,2,2,2,2,2,2, # 88 - 8f
2,2,2,2,2,2,2,2, # 90 - 97
2,2,2,2,2,2,2,2, # 98 - 9f
2,2,2,2,2,2,2,2, # a0 - a7
2,2,2,2,2,2,2,2, # a8 - af
2,2,2,2,2,2,2,2, # b0 - b7
2,2,2,2,2,2,2,2, # b8 - bf
2,2,2,2,2,2,2,2, # c0 - c7
2,2,2,2,2,2,2,2, # c8 - cf
2,2,2,2,2,2,2,2, # d0 - d7
2,2,2,2,2,2,2,2, # d8 - df
2,2,2,2,2,2,2,2, # e0 - e7
2,2,2,2,2,2,2,2, # e8 - ef
2,2,2,2,2,2,2,2, # f0 - f7
2,2,2,2,2,2,2,2, # f8 - ff
)
ISO2022KR_ST = (
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27
)
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS,
'class_factor': 6,
'state_table': ISO2022KR_ST,
'char_len_table': ISO2022KR_CHAR_LEN_TABLE,
'name': "ISO-2022-KR",
'language': 'Korean'}

Some files were not shown because too many files have changed in this diff Show more