package com.walker.semantics;
|
|
/**
|
* 描述:词性枚举定义(part of speech)
|
* @author 时克英
|
* @date 2020年9月25日 下午5:39:35
|
*/
|
|
public enum SpeechPart {
|
|
A_ADJECTIVE{
|
public boolean isFocus(){return true;}
|
}, // 形容词,在时间抽取中使用了关键词(近、最近)
|
B_DISTIN{
|
public boolean isFocus(){return true;}
|
}, // 区别词,如:大号、彩色等,与形容词类似。对一些机器学习有用,但目前交互并不需要
|
C_CONJUNCTION{
|
public boolean isFocus(){return false;}
|
}, // 连词,如:既然、由于等
|
D_ADVERB{
|
public boolean isFocus(){return false;}
|
}, // 副词,如:永远、急忙等
|
E_INTERJECTION{
|
public boolean isFocus(){return false;}
|
}, // 连词,如:就是、只是等
|
F_POSITION{
|
public boolean isFocus(){return false;}
|
}, // 方位词,如:在...前面、底下等
|
M_NUMBER{
|
public boolean isFocus(){return true;}
|
}, // 数词,如:几、12345等
|
MQ_NUMBER_UNIT{
|
public boolean isFocus(){return true;}
|
}, // 数词带单位,如:一辆汽车等
|
N_NAME{
|
public boolean isFocus(){return true;}
|
}, // 名词
|
NS_PLACENAME{
|
public boolean isFocus(){return true;}
|
}, // 地名词,如:黄河、故宫等
|
NT_NAMETIME{
|
public boolean isFocus(){return false;}
|
}, // 该词性无法判断,可以忽略。实际测试中,如:你的
|
NZ_NAMESTATUS{
|
public boolean isFocus(){return true;}
|
}, // 状态名词,如:走错、高票
|
// 2021-11-16 发现第一页也是nz
|
O_ANO{
|
public boolean isFocus(){return false;}
|
}, // 拟声词,如:咕咚、叮咚等
|
P_PREPOSITION{
|
public boolean isFocus(){return true;}
|
}, // 介词,如:对于、通过等,因为要判断时间范围(从...到)所以还是要关注介词
|
Q_QUANTIFIER{
|
public boolean isFocus(){return true;}
|
}, // 量词,如:号、个
|
QV{
|
public boolean isFocus(){return true;}
|
// 量词:次(2021-09-16)
|
},
|
R_PRONOUN{
|
public boolean isFocus(){return true;}
|
}, // 代词,如:你、我、他们、那、什么
|
RYS{
|
public boolean isFocus(){return true;}
|
}, // 哪里,这个词出现rys词性,可能是代词、方位词合体
|
S_PLACE{
|
public boolean isFocus(){return true;}
|
}, // 处所词,如:泰山、中山(ns)公园
|
T_TIME{
|
public boolean isFocus(){return true;}
|
}, // 时间词,如:**后、明天
|
/*QT_TIME_UNIT{
|
public boolean isFocus(){return true;}
|
}, // 时间单位词,如:分钟、(30)秒
|
*/ U_AUXILIARY{
|
public boolean isFocus(){return false;}
|
}, // 助词,如:了、的
|
V_VERB{
|
public boolean isFocus(){return true;}
|
}, // 动词
|
// VI {
|
// public boolean isFocus(){return true;}
|
// }, // 动词,【出发】中出现
|
VN_VERBNAME{
|
public boolean isFocus(){return true;}
|
}, // 动名词
|
Y_STATEMENT{
|
public boolean isFocus(){return false;}
|
}, // 语气词,如:呢、吗、吧等
|
Z_STATUS{
|
public boolean isFocus(){return false;}
|
}, // 状态词,如:冰凉、鲜红(颜色都是)等
|
W_PUNCTUATION{
|
public boolean isFocus(){return false;}
|
}, // 标点符号
|
|
EN_ENGLISH{
|
public boolean isFocus(){return true;}
|
}, // 英文
|
X_UNKNOWN{
|
public boolean isFocus(){return false;}
|
}, // 无法识别的字符串
|
|
MY_N{
|
public boolean isFocus(){return true;}
|
}, // 自定义名词
|
MY_V{
|
public boolean isFocus(){return true;}
|
}, // 自定义动词,通常用于指令集
|
MY_V_AUX{
|
public boolean isFocus(){return true;}
|
}, // 自定义辅助词,指令辅助词,如:显示...分布,启动...推力(到达)等
|
MY_PLACE {
|
public boolean isFocus(){return true;}
|
}, // 自定义地点,通常用于目的地,如:某单位、饭店等
|
MY_ROAD{
|
public boolean isFocus(){return true;}
|
}; // 道路专用类型,2021-09-01
|
|
/**
|
* 该词性是否应被关注,对于大部分词性对解析并无作用,因此我们可以直接忽略。
|
* @return
|
*/
|
public boolean isFocus(){
|
throw new AbstractMethodError();
|
}
|
|
public static SpeechPart toSpeechPart(String index){
|
if(index.equals(INDEX_A_ADJECTIVE)){
|
return A_ADJECTIVE;
|
} else if(index.equals(INDEX_B_DISTIN)){
|
return B_DISTIN;
|
} else if(index.equals(INDEX_C_CONJUNCTION) || index.equals("cc")){
|
// 连词,出现了:cc
|
return C_CONJUNCTION;
|
} else if(index.equals(INDEX_D_ADVERB)){
|
return D_ADVERB;
|
} else if(index.equals(INDEX_E_INTERJECTION)){
|
return E_INTERJECTION;
|
} else if(index.equals(INDEX_F_POSITION)){
|
return F_POSITION;
|
} else if(index.equals(INDEX_M_NUMBER)){
|
return M_NUMBER;
|
} else if(index.equals(INDEX_MQ_NUMBER_UNIT)){
|
return MQ_NUMBER_UNIT;
|
// } else if(index.equals(INDEX_N_NAME) || index.equals("nis")){
|
} else if(index.startsWith(INDEX_N_NAME)){
|
// 给/p,我/rr,三个/mq,苹果/nf
|
return N_NAME;
|
} else if(index.equals(INDEX_NS_PLACENAME)){
|
return NS_PLACENAME;
|
} else if(index.equals(INDEX_NZ_NAMESTATUS)){
|
return NZ_NAMESTATUS;
|
} else if(index.equals(INDEX_NT_NAMETIME)){
|
return NT_NAMETIME;
|
} else if(index.equals(INDEX_O_ANO)){
|
return O_ANO;
|
} else if(index.equals(INDEX_P_PREPOSITION)){
|
return P_PREPOSITION;
|
// } else if(index.equals(INDEX_R_PRONOUN) || index.equals("ry")){
|
} else if(index.equals(INDEX_Q_QUANTIFIER)){
|
// 量词,如:个、点钟
|
return Q_QUANTIFIER;
|
} else if(index.equals(INDEX_QV)){
|
// 量词,如:个、点钟
|
return QV;
|
}
|
else if(index.startsWith(INDEX_R_PRONOUN)){
|
// 后天什么天气中,什么为ry
|
return R_PRONOUN;
|
} else if(index.equals(INDEX_RYS)){
|
return RYS;
|
} else if(index.equals(INDEX_S_PLACE)){
|
return S_PLACE;
|
} else if(index.equals(INDEX_T_TIME) || index.equals(INDEX_QT_TIME_UNIT)){
|
// 年、分钟等(qt)
|
return T_TIME;
|
// } else if(index.equals(INDEX_U_AUXILIARY) || index.equals(INDEX_U_DEL)){
|
} else if(index.startsWith(INDEX_U_AUXILIARY)){
|
return U_AUXILIARY;
|
// } else if(index.equals(INDEX_V_VERB) || index.equals(INDEX_VI)){
|
} else if(index.startsWith(INDEX_V_VERB)){
|
return V_VERB;
|
} else if(index.equals(INDEX_VN_VERBNAME)){
|
return VN_VERBNAME;
|
} else if(index.equals(INDEX_Y_STATEMENT)){
|
return Y_STATEMENT;
|
} else if(index.equals(INDEX_Z_STATUS)){
|
return Z_STATUS;
|
} else if(index.equals(INDEX_W_PUNCTUATION)){
|
return W_PUNCTUATION;
|
} else if(index.equals(INDEX_EN_ENGLISH)){
|
return EN_ENGLISH;
|
} else if(index.equals(INDEX_X_UNKNOWN)){
|
return X_UNKNOWN;
|
} else if(index.equals(INDEX_MY_N)){
|
return MY_N;
|
} else if(index.equals(INDEX_MY_V)){
|
return MY_V;
|
} else if(index.equals(INDEX_MY_V_AUX)){
|
return MY_V_AUX;
|
} else if(index.equals(INDEX_MY_PLACE)){
|
return MY_PLACE;
|
} else if(index.equals(INDEX_MY_ROAD)){
|
return MY_ROAD;
|
} else {
|
// throw new UnsupportedOperationException("未支持的词性:" + index);
|
// 对不支持的词性,不再报错,直接设置成未知类型
|
System.out.println("未支持的词性:" + index);
|
return X_UNKNOWN;
|
}
|
}
|
|
public static final String INDEX_A_ADJECTIVE = "a";
|
public static final String INDEX_B_DISTIN = "b";
|
public static final String INDEX_C_CONJUNCTION = "c";
|
public static final String INDEX_D_ADVERB = "d";
|
public static final String INDEX_E_INTERJECTION = "e";
|
public static final String INDEX_F_POSITION = "f";
|
public static final String INDEX_M_NUMBER = "m";
|
public static final String INDEX_MQ_NUMBER_UNIT = "mq";
|
public static final String INDEX_N_NAME = "n";
|
public static final String INDEX_NS_PLACENAME = "ns";
|
public static final String INDEX_NZ_NAMESTATUS = "nz";
|
public static final String INDEX_NT_NAMETIME = "nt";
|
public static final String INDEX_O_ANO = "o";
|
public static final String INDEX_P_PREPOSITION = "p";
|
public static final String INDEX_Q_QUANTIFIER = "q";
|
public static final String INDEX_QV = "qv";
|
public static final String INDEX_R_PRONOUN = "r";
|
public static final String INDEX_RYS = "rys";
|
public static final String INDEX_S_PLACE = "s";
|
public static final String INDEX_T_TIME = "t";
|
public static final String INDEX_QT_TIME_UNIT= "qt";
|
public static final String INDEX_U_AUXILIARY = "u";
|
public static final String INDEX_U_DEL = "ude1";
|
public static final String INDEX_V_VERB = "v";
|
public static final String INDEX_VI = "vi";
|
public static final String INDEX_VN_VERBNAME = "vn";
|
public static final String INDEX_Y_STATEMENT = "y";
|
public static final String INDEX_Z_STATUS = "z";
|
public static final String INDEX_W_PUNCTUATION = "w";
|
public static final String INDEX_EN_ENGLISH = "en";
|
public static final String INDEX_X_UNKNOWN = "x";
|
public static final String INDEX_MY_N = "mn";
|
public static final String INDEX_MY_V = "mv";
|
public static final String INDEX_MY_V_AUX = "mv_aux";
|
public static final String INDEX_MY_PLACE = "mp";
|
public static final String INDEX_MY_ROAD = "mr";
|
}
|