Note: The data mining functions operate on models that have been built using the DBMS_DATA_MINING package or the Oracle Data Mining Java API.
|
|
CLUSTER_ID |
Returns the cluster identifier of the predicted cluster with the
highest probability for the set of predictors specified in the mining_attribute_clause
|
CLUSTER_ID(<schame.model> <mining_attribute_clause>) |
SELECT CLUSTER_ID(km_sh_clus_sample USING *) AS clus, COUNT(*) AS cnt
FROM km_sh_sample_apply_prepared
GROUP BY CLUSTER_ID(km_sh_clus_sample USING *)
ORDER BY cnt DESC;
|
|
CLUSTER_PROBABILITY |
Returns a measure of the degree of confidence of membership of an input row
in a cluster associated with the specified model.
|
CLUSTER_PROBABILITY(<schema.model>, <cluster_id> <mining_attribute_clause>)
|
SELECT *
FROM (
SELECT cust_id, CLUSTER_PROBABILITY(km_sh_clus_sample, 2 USING *)
prob
FROM km_sh_sample_apply_prepared
ORDER BY prob DESC)
WHERE ROWNUM < 11;
|
|
CLUSTER_SET |
Returns a varray of objects containing all possible clusters that a given row belongs to. Each object in the varray is a pair of scalar values containing the cluster ID and the cluster probability. The object fields are named CLUSTER_ID and PROBABILITY, and both are Oracle
NUMBER |
CLUSTER_SET(<schema.model>, <top N>, <cutoff>
<mining_attribute_clause>) |
WITH
clus_tab AS (SELECT id, A.attribute_name aname,
A.conditional_operator op, NVL(A.attribute_str_value,
ROUND(DECODE(A.attribute_name, N.col,
A.attribute_num_value * N.scale + N.shift,
A.attribute_num_value),4)) val,
A.attribute_support support,
A.attribute_confidence confidence
FROM TABLE(DBMS_DATA_MINING.GET_MODEL_DETAILS_KM('km_sh_clus_sample')) T,
TABLE(T.rule.antecedent) A,
km_sh_sample_norm N
WHERE A.attribute_name = N.col(+) AND A.attribute_confidence > 0.55),
clust AS (
SELECT id, CAST(COLLECT(Cattr(aname, op, TO_CHAR(val), support,
confidence)) AS Cattrs) cl_attrs
FROM clus_tab
GROUP BY id), custclus AS (
SELECT T.cust_id, S.cluster_id, S.probability
FROM (
SELECT cust_id, CLUSTER_SET(km_sh_clus_sample, NULL, 0.2 USING *)
pset
FROM km_sh_sample_apply_prepared
WHERE cust_id = 101362) T,
TABLE(T.pset) S)
SELECT A.probability prob, A.cluster_id cl_id, B.attr,
B.op, B.val,
B.supp, B.conf
FROM custclus A, (
SELECT T.id, C.*
FROM clust T, TABLE(T.cl_attrs) C) B
WHERE A.cluster_id = B.id
ORDER BY prob DESC, cl_id ASC, conf DESC, attr
ASC, val ASC; |
|
FEATURE_ID |
Returns an Oracle NUMBER that is the identifier of the feature with the highest coefficient
value |
FEATURE_ID(<schame.model> <mining_attribute_clause>) |
SELECT FEATURE_ID(nmf_sh_sample USING *) AS feat, COUNT(*) AS cnt
FROM nmf_sh_sample_apply_prepared
GROUP BY FEATURE_ID(nmf_sh_sample USING *)
ORDER BY cnt DESC; |
|
FEATURE_SET |
Returns a varray of objects containing all possible features. Each object in the varray is a pair of scalar values containing the feature ID and the feature value. The object fields are named FEATURE�_ID and VALUE, and both are Oracle
NUMBERs. |
FEATURE_SET(<schema.model>, <top N>,
<cutoff>
<mining_attribute_clause>) |
WITH
feat_tab AS (
SELECT F.feature_id fid,
A.attribute_name attr,
TO_CHAR(A.attribute_value) val,
A.coefficient coeff
FROM TABLE(DBMS_DATA_MINING.GET_MODEL_DETAILS_NMF('nmf_sh_sample')) F,
TABLE(F.attribute_set) A
WHERE A.coefficient > 0.25), feat AS (
SELECT fid,
CAST(COLLECT(Featattr(attr, val, coeff)) AS Featattrs) f_attrs
FROM feat_tab
GROUP BY fid
),
cust_10_features AS (
SELECT T.cust_id, S.feature_id, S.value
FROM (SELECT cust_id, FEATURE_SET(nmf_sh_sample, 10 USING *) pset
FROM nmf_sh_sample_apply_prepared
WHERE cust_id = 100002) T, TABLE(T.pset) S)
SELECT A.value, A.feature_id fid,
B.attr, B.val, B.coeff
FROM cust_10_features A,
(SELECT T.fid, F.*
FROM feat T, TABLE(T.f_attrs) F) B
WHERE A.feature_id = B.fid
ORDER BY A.value DESC, A.feature_id ASC, coeff DESC, attr ASC, val ASC; |
|
FEATURE_VALUE |
Returns the value of a given feature. If you omit the feature_id argument, then the function returns the highest feature value. You can use this form in conjunction with the FEATURE_ID function to obtain the largest feature/value
combo. |
FEATURE_VALUE(<schema.model>,<feature_id><mining_attribute_clause>) |
SELECT *
FROM (
SELECT cust_id, FEATURE_VALUE(nmf_sh_sample,3 USING *)
match_qual
FROM nmf_sh_sample_apply_prepared
ORDER BY match_quality DESC)
WHERE ROWNUM < 11; |
|
PREDICTION |
Returns the best prediction for the model. The datatype returned depends on the target value type used during the build of the model. For regression models, this function returns the expected value. |
PREDICTION(<schema.model>, <cost_matrix_clause>
<mining_attribute_clause>) |
SELECT cust_gender, COUNT(*) AS cnt, ROUND(AVG(age)) AS avg_age
FROM mining_data_apply_v
WHERE PREDICTION(DT_SH_Clas_sample COST MODEL
USING cust_marital_status, education, household_size) = 1
GROUP BY cust_gender
ORDER BY cust_gender; |
|
PREDICTION_BOUNDS
(new in 11g) |
Ffor use only with generalized linear models. It returns an object with two NUMBER fields LOWER and UPPER. |
PREDICTION_BOUNDS(<schema.model>, <confidence_level>,
<class_value>
USING <* | schema.table.* | expression AS alias> |
SELECT cust_marital_status,
COUNT(cust_id) AS CUST_COUNT
FROM (
SELECT cust_id, cust_marital_status
FROM mining_data_apply_v
WHERE PREDICTION_BOUNDS(glmr_sh_regr_sample,0.98 USING *).LOWER > 24
AND PREDICTION_BOUNDS(glmr_sh_regr_sample,0.98 USING *).UPPER < 46)
GROUP BY cust_marital_status; |
|
PREDICTION_COST |
Returns a measure of cost for a given prediction as an Oracle
NUMBER |
PREDICTION_COST(<schema.model>,
<class> <cost_matrix_clause>
<mining_attribute_clause>) |
WITH
cust_italy AS (
SELECT cust_id
FROM mining_data_apply_v
WHERE country_name = 'Italy'
ORDER BY PREDICTION_COST(DT_SH_Clas_sample,1 COST MODEL USING *)
ASC,1)
SELECT cust_id
FROM cust_italy
WHERE rownum < 11; |
|
PREDICTION_DETAILS |
Returns an XML string containing model-specific information related to the scoring of the input row
|
PREDICTION_DETAILS(<schema.model> <mining_attribute_clause>) |
SELECT cust_id, education,
PREDICTION_DETAILS(DT_SH_Clas_sample using *) treenode
FROM mining_data_apply_v
WHERE occupation = 'TechSup' AND age < 25
ORDER BY cust_id;
|
|
PREDICTION_PROBABILITY |
Returns the probability for a given prediction as an Oracle NUMBER |
PREDICTION_PROBABILITY(<schema.model>
<class>
<mining_attribute_clause>) |
SELECT cust_id
FROM (
SELECT cust_id
FROM mining_data_apply_v
WHERE country_name = 'Italy'
ORDER BY PREDICTION_PROBABILITY(DT_SH_Clas_sample, 1 USING *)
DESC, cust_id)
WHERE rownum < 11; |
|
PREDICTION_SET |
Returns a varray of objects containing all classes in a multiclass classification scenario |
PREDICTION_SET(<schema.model>, <best
N>, <cutoff>
<cost_matrix_clause> <mining_attribute_clause>) |
SELECT T.cust_id, S.prediction, S.probability, S.cost
FROM (
SELECT cust_id,
PREDICTION_SET(dt_sh_clas_sample COST MODEL USING *) pset
FROM mining_data_apply_v
WHERE cust_id < 100011) T,
TABLE(T.pset) S
ORDER BY cust_id, S.prediction; |