package com.walker.semantics; /** * 描述:词性枚举定义(part of speech) * @author 时克英 * @date 2020年9月25日 下午5:39:35 */ public enum SpeechPart { A_ADJECTIVE{ public boolean isFocus(){return true;} }, // 形容词,在时间抽取中使用了关键词(近、最近) B_DISTIN{ public boolean isFocus(){return true;} }, // 区别词,如:大号、彩色等,与形容词类似。对一些机器学习有用,但目前交互并不需要 C_CONJUNCTION{ public boolean isFocus(){return false;} }, // 连词,如:既然、由于等 D_ADVERB{ public boolean isFocus(){return false;} }, // 副词,如:永远、急忙等 E_INTERJECTION{ public boolean isFocus(){return false;} }, // 连词,如:就是、只是等 F_POSITION{ public boolean isFocus(){return false;} }, // 方位词,如:在...前面、底下等 M_NUMBER{ public boolean isFocus(){return true;} }, // 数词,如:几、12345等 MQ_NUMBER_UNIT{ public boolean isFocus(){return true;} }, // 数词带单位,如:一辆汽车等 N_NAME{ public boolean isFocus(){return true;} }, // 名词 NS_PLACENAME{ public boolean isFocus(){return true;} }, // 地名词,如:黄河、故宫等 NT_NAMETIME{ public boolean isFocus(){return false;} }, // 该词性无法判断,可以忽略。实际测试中,如:你的 NZ_NAMESTATUS{ public boolean isFocus(){return true;} }, // 状态名词,如:走错、高票 // 2021-11-16 发现第一页也是nz O_ANO{ public boolean isFocus(){return false;} }, // 拟声词,如:咕咚、叮咚等 P_PREPOSITION{ public boolean isFocus(){return true;} }, // 介词,如:对于、通过等,因为要判断时间范围(从...到)所以还是要关注介词 Q_QUANTIFIER{ public boolean isFocus(){return true;} }, // 量词,如:号、个 QV{ public boolean isFocus(){return true;} // 量词:次(2021-09-16) }, R_PRONOUN{ public boolean isFocus(){return true;} }, // 代词,如:你、我、他们、那、什么 RYS{ public boolean isFocus(){return true;} }, // 哪里,这个词出现rys词性,可能是代词、方位词合体 S_PLACE{ public boolean isFocus(){return true;} }, // 处所词,如:泰山、中山(ns)公园 T_TIME{ public boolean isFocus(){return true;} }, // 时间词,如:**后、明天 /*QT_TIME_UNIT{ public boolean isFocus(){return true;} }, // 时间单位词,如:分钟、(30)秒 */ U_AUXILIARY{ public boolean isFocus(){return false;} }, // 助词,如:了、的 V_VERB{ public boolean isFocus(){return true;} }, // 动词 // VI { // public boolean isFocus(){return true;} // }, // 动词,【出发】中出现 VN_VERBNAME{ public boolean isFocus(){return true;} }, // 动名词 Y_STATEMENT{ public boolean isFocus(){return false;} }, // 语气词,如:呢、吗、吧等 Z_STATUS{ public boolean isFocus(){return false;} }, // 状态词,如:冰凉、鲜红(颜色都是)等 W_PUNCTUATION{ public boolean isFocus(){return false;} }, // 标点符号 EN_ENGLISH{ public boolean isFocus(){return true;} }, // 英文 X_UNKNOWN{ public boolean isFocus(){return false;} }, // 无法识别的字符串 MY_N{ public boolean isFocus(){return true;} }, // 自定义名词 MY_V{ public boolean isFocus(){return true;} }, // 自定义动词,通常用于指令集 MY_V_AUX{ public boolean isFocus(){return true;} }, // 自定义辅助词,指令辅助词,如:显示...分布,启动...推力(到达)等 MY_PLACE { public boolean isFocus(){return true;} }, // 自定义地点,通常用于目的地,如:某单位、饭店等 MY_ROAD{ public boolean isFocus(){return true;} }; // 道路专用类型,2021-09-01 /** * 该词性是否应被关注,对于大部分词性对解析并无作用,因此我们可以直接忽略。 * @return */ public boolean isFocus(){ throw new AbstractMethodError(); } public static SpeechPart toSpeechPart(String index){ if(index.equals(INDEX_A_ADJECTIVE)){ return A_ADJECTIVE; } else if(index.equals(INDEX_B_DISTIN)){ return B_DISTIN; } else if(index.equals(INDEX_C_CONJUNCTION) || index.equals("cc")){ // 连词,出现了:cc return C_CONJUNCTION; } else if(index.equals(INDEX_D_ADVERB)){ return D_ADVERB; } else if(index.equals(INDEX_E_INTERJECTION)){ return E_INTERJECTION; } else if(index.equals(INDEX_F_POSITION)){ return F_POSITION; } else if(index.equals(INDEX_M_NUMBER)){ return M_NUMBER; } else if(index.equals(INDEX_MQ_NUMBER_UNIT)){ return MQ_NUMBER_UNIT; // } else if(index.equals(INDEX_N_NAME) || index.equals("nis")){ } else if(index.startsWith(INDEX_N_NAME)){ // 给/p,我/rr,三个/mq,苹果/nf return N_NAME; } else if(index.equals(INDEX_NS_PLACENAME)){ return NS_PLACENAME; } else if(index.equals(INDEX_NZ_NAMESTATUS)){ return NZ_NAMESTATUS; } else if(index.equals(INDEX_NT_NAMETIME)){ return NT_NAMETIME; } else if(index.equals(INDEX_O_ANO)){ return O_ANO; } else if(index.equals(INDEX_P_PREPOSITION)){ return P_PREPOSITION; // } else if(index.equals(INDEX_R_PRONOUN) || index.equals("ry")){ } else if(index.equals(INDEX_Q_QUANTIFIER)){ // 量词,如:个、点钟 return Q_QUANTIFIER; } else if(index.equals(INDEX_QV)){ // 量词,如:个、点钟 return QV; } else if(index.startsWith(INDEX_R_PRONOUN)){ // 后天什么天气中,什么为ry return R_PRONOUN; } else if(index.equals(INDEX_RYS)){ return RYS; } else if(index.equals(INDEX_S_PLACE)){ return S_PLACE; } else if(index.equals(INDEX_T_TIME) || index.equals(INDEX_QT_TIME_UNIT)){ // 年、分钟等(qt) return T_TIME; // } else if(index.equals(INDEX_U_AUXILIARY) || index.equals(INDEX_U_DEL)){ } else if(index.startsWith(INDEX_U_AUXILIARY)){ return U_AUXILIARY; // } else if(index.equals(INDEX_V_VERB) || index.equals(INDEX_VI)){ } else if(index.startsWith(INDEX_V_VERB)){ return V_VERB; } else if(index.equals(INDEX_VN_VERBNAME)){ return VN_VERBNAME; } else if(index.equals(INDEX_Y_STATEMENT)){ return Y_STATEMENT; } else if(index.equals(INDEX_Z_STATUS)){ return Z_STATUS; } else if(index.equals(INDEX_W_PUNCTUATION)){ return W_PUNCTUATION; } else if(index.equals(INDEX_EN_ENGLISH)){ return EN_ENGLISH; } else if(index.equals(INDEX_X_UNKNOWN)){ return X_UNKNOWN; } else if(index.equals(INDEX_MY_N)){ return MY_N; } else if(index.equals(INDEX_MY_V)){ return MY_V; } else if(index.equals(INDEX_MY_V_AUX)){ return MY_V_AUX; } else if(index.equals(INDEX_MY_PLACE)){ return MY_PLACE; } else if(index.equals(INDEX_MY_ROAD)){ return MY_ROAD; } else { // throw new UnsupportedOperationException("未支持的词性:" + index); // 对不支持的词性,不再报错,直接设置成未知类型 System.out.println("未支持的词性:" + index); return X_UNKNOWN; } } public static final String INDEX_A_ADJECTIVE = "a"; public static final String INDEX_B_DISTIN = "b"; public static final String INDEX_C_CONJUNCTION = "c"; public static final String INDEX_D_ADVERB = "d"; public static final String INDEX_E_INTERJECTION = "e"; public static final String INDEX_F_POSITION = "f"; public static final String INDEX_M_NUMBER = "m"; public static final String INDEX_MQ_NUMBER_UNIT = "mq"; public static final String INDEX_N_NAME = "n"; public static final String INDEX_NS_PLACENAME = "ns"; public static final String INDEX_NZ_NAMESTATUS = "nz"; public static final String INDEX_NT_NAMETIME = "nt"; public static final String INDEX_O_ANO = "o"; public static final String INDEX_P_PREPOSITION = "p"; public static final String INDEX_Q_QUANTIFIER = "q"; public static final String INDEX_QV = "qv"; public static final String INDEX_R_PRONOUN = "r"; public static final String INDEX_RYS = "rys"; public static final String INDEX_S_PLACE = "s"; public static final String INDEX_T_TIME = "t"; public static final String INDEX_QT_TIME_UNIT= "qt"; public static final String INDEX_U_AUXILIARY = "u"; public static final String INDEX_U_DEL = "ude1"; public static final String INDEX_V_VERB = "v"; public static final String INDEX_VI = "vi"; public static final String INDEX_VN_VERBNAME = "vn"; public static final String INDEX_Y_STATEMENT = "y"; public static final String INDEX_Z_STATUS = "z"; public static final String INDEX_W_PUNCTUATION = "w"; public static final String INDEX_EN_ENGLISH = "en"; public static final String INDEX_X_UNKNOWN = "x"; public static final String INDEX_MY_N = "mn"; public static final String INDEX_MY_V = "mv"; public static final String INDEX_MY_V_AUX = "mv_aux"; public static final String INDEX_MY_PLACE = "mp"; public static final String INDEX_MY_ROAD = "mr"; }