Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ jieba
```python
# encoding=utf-8
import jieba

seg_list = jieba.cut("我来到北京清华大学", use_paddle=True)
print("Paddle Mode: " + "/ ".join(seg_list)) # paddle模式, 0.40版之后开始支持,早期版本不支持
jieba.enable_paddle()# 启动paddle模式, 一次启动后后续可以重复使用。 0.40版之后开始支持,早期版本不支持
seg_list = jieba.cut("我来到北京清华大学", use_paddle=True)# 使用paddle模式
print("Paddle Mode: " + "/ ".join(seg_list))

seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
print("Full Mode: " + "/ ".join(seg_list)) # 全模式
Expand Down Expand Up @@ -196,7 +196,11 @@ https://github.com/fxsjy/jieba/blob/master/test/extract_tags.py
```pycon
>>> import jieba.posseg as pseg
>>> words = pseg.cut("我爱北京天安门") #jieba默认模式
>>> words = pseg.cut("我爱北京天安门",use_paddle=True) #paddle模式

>>> import jieba
>>> jieba.enable_paddle()# 启动paddle模式, 一次启动后后续可以重复使用。 0.40版之后开始支持,早期版本不支持
>>> words = pseg.cut("我爱北京天安门",use_paddle=True) #使用paddle模式

>>> for word, flag in words:
... print('%s %s' % (word, flag))
...
Expand Down
29 changes: 18 additions & 11 deletions jieba/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@
re_han_default = re.compile("([\u4E00-\u9FD5a-zA-Z0-9+#&\._%\-]+)", re.U)

re_skip_default = re.compile("(\r\n|\s)", re.U)
re_han_cut_all = re.compile("([\u4E00-\u9FD5]+)", re.U)
re_skip_cut_all = re.compile("[^a-zA-Z0-9+#\n]", re.U)

def setLogLevel(log_level):
global logger
Expand Down Expand Up @@ -200,15 +198,29 @@ def get_DAG(self, sentence):
def __cut_all(self, sentence):
dag = self.get_DAG(sentence)
old_j = -1
eng_scan = 0
eng_buf = u''
for k, L in iteritems(dag):
if eng_scan==1 and not re_eng.match(sentence[k]):
eng_scan = 0
yield eng_buf
if len(L) == 1 and k > old_j:
yield sentence[k:L[0] + 1]
if re_eng.match(sentence[k]):
if eng_scan == 0:
eng_scan = 1
eng_buf = sentence[k]
else:
eng_buf += sentence[k]
if eng_scan == 0:
yield sentence[k:L[0] + 1]
old_j = L[0]
else:
for j in L:
if j > k:
yield sentence[k:j + 1]
old_j = j
if eng_scan==1:
yield eng_buf

def __cut_DAG_NO_HMM(self, sentence):
DAG = self.get_DAG(sentence)
Expand Down Expand Up @@ -285,10 +297,9 @@ def cut(self, sentence, cut_all = False, HMM = True,use_paddle = False):
'''
is_paddle_installed = False
if use_paddle == True:
import_paddle_check = import_paddle()
is_paddle_installed = check_paddle_install()
sentence = strdecode(sentence)
if use_paddle == True and is_paddle_installed == True and import_paddle_check == True:
if use_paddle == True and is_paddle_installed == True:
if sentence is None or sentence == "" or sentence == u"":
yield sentence
return
Expand All @@ -299,12 +310,8 @@ def cut(self, sentence, cut_all = False, HMM = True,use_paddle = False):
continue
yield sent
return
if cut_all:
re_han = re_han_cut_all
re_skip = re_skip_cut_all
else:
re_han = re_han_default
re_skip = re_skip_default
re_han = re_han_default
re_skip = re_skip_default
if cut_all:
cut_block = self.__cut_all
elif HMM:
Expand Down
36 changes: 20 additions & 16 deletions jieba/_compat.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
import os
import sys
import importlib
import logging

log_console = logging.StreamHandler(sys.stderr)
Expand All @@ -21,23 +20,32 @@ def setLogLevel(log_level):
os.getcwd(), os.path.dirname(__file__), *res)), 'rb')


def import_paddle():
def enable_paddle():
import_paddle_check = False
try:
import paddle
except ImportError:
default_logger.debug("Import paddle error, please use command to install: pip install paddlepaddle-tiny==1.6.1. "
default_logger.debug("Install paddle-tiny, please waite a minute......")
os.system("pip install paddlepaddle-tiny")
try:
import paddle
except ImportError:
default_logger.debug("Import paddle error, please use command to install: pip install paddlepaddle-tiny==1.6.1."
"Now, back to jieba basic cut......")
return False
try:
if paddle.__version__ >= '1.6.1' or paddle.__version__ >= u'1.6.1':
if paddle.__version__ < '1.6.1':
default_logger.debug("Find your paddle version is not correct, please use command to upgrade: "
"pip install --upgrade paddlepaddle-tiny or pip install --upgrade paddlepaddle ")
else:
try:
import paddle.fluid as fluid
import jieba.lac_small.predict as predict
import_paddle_check = True
except ImportError:
default_logger.debug("Import error, cannot find paddle.fluid and jieba.lac_small.predict module. "
default_logger.debug("Paddle enable successfully......")
except ImportError:
default_logger.debug("Import error, cannot find paddle.fluid and jieba.lac_small.predict module. "
"Now, back to jieba basic cut......")
return False
return False
return import_paddle_check


Expand Down Expand Up @@ -81,14 +89,10 @@ def check_paddle_install():
is_paddle_installed = False
try:
import paddle
if importlib.find_module('paddle') and (paddle.__version__ >= '1.6.1' or paddle.__version__ >= u'1.6.1'):
is_paddle_installed = True
elif paddle.__version__ < '1.6.1':
is_paddle_installed = False
default_logger.debug("Check the paddle version is not correct, the current version is "+ paddle.__version__+","
"please use command to install paddle: pip uninstall paddlepaddle(-gpu), "
"pip install paddlepaddle-tiny==1.6.1. Now, back to jieba basic cut......")
import paddle.fluid as fluid
import jieba.lac_small.predict as predict
is_paddle_installed = True
except ImportError:
default_logger.debug("Import paddle error, back to jieba basic cut......")
default_logger.debug("Import error,please use enable_paddle() to enable paddle. Back to jieba basic cut......")
is_paddle_installed = False
return is_paddle_installed
3 changes: 1 addition & 2 deletions jieba/posseg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,9 +278,8 @@ def cut(sentence, HMM=True, use_paddle=False):
"""
is_paddle_installed = False
if use_paddle == True:
import_paddle_check = import_paddle()
is_paddle_installed = check_paddle_install()
if use_paddle==True and is_paddle_installed == True and import_paddle_check == True:
if use_paddle==True and is_paddle_installed == True:
if sentence is None or sentence == "" or sentence == u"":
yield pair(None, None)
return
Expand Down