请帮忙改进以下算法
时间:2011-12-20
来源:互联网
算法是计算两个向量的Cosine距离.
/**
* Calculate the correlation between tow terms.
* @param term1
* @param term2
* @param vector1 "4-0.009662 30-0.002465 60-0.000822 75-0.003846"
* @param vector2 "231-0.004167 422-0.005089 533-0.001410 1032-0.002660"
* @return correlation value of two terms.
*/
public static double CalcCorr(int term1, int term2)
{
double dSum = 0;
TermVector t1 = termVectors[term1];
TermVector t2 = termVectors[term2];
/* Because two arrays are already sorted by the docId, so we can use the method below to calculate the correlation */
int start_j = 0;
for(int i = 0; i < t1.size; i++)
{
for(int j = start_j; j < t2.size; j++)
{
/* If i matched j */
if (t2.docIds[j] == t1.docIds[i])
{
start_j = j + 1; /* If i matched j, then store the position j+1 as the start point of next matching. */
dSum += t1.weights[i] * t2.weights[j]; /* accumulate the value of (weight_i * weight_j) */
}
}
/* If reaching the end of t2, then no need to visit the left part of t1. */
if (start_j >= t2.size)
{
break;
}
}
double correlation = Math.sqrt(dSum) / ( t1.sqrtSumWeight * t2.sqrtSumWeight );
return correlation;
}
public class TermVector {
public int[] docIds;
public double[] weights;
public double sqrtSumWeight;
public int size;
/**
* @param str "60-0.000822 221-0.003378 230-0.001383 325-0.002527"
*/
public TermVector(double sqrtSumWeight, String str)
{
this.sqrtSumWeight = sqrtSumWeight;
String[] vector = str.trim().split(" ");
size = vector.length;
docIds = new int[size];
weights = new double[size];
for(int i = 0; i < size; i++)
{
String[] tmp = vector[i].split("-");
docIds[i] = Integer.valueOf(tmp[0]);
weights[i] = Double.valueOf(tmp[1]);
}
}
}
/**
* Calculate the correlation between tow terms.
* @param term1
* @param term2
* @param vector1 "4-0.009662 30-0.002465 60-0.000822 75-0.003846"
* @param vector2 "231-0.004167 422-0.005089 533-0.001410 1032-0.002660"
* @return correlation value of two terms.
*/
public static double CalcCorr(int term1, int term2)
{
double dSum = 0;
TermVector t1 = termVectors[term1];
TermVector t2 = termVectors[term2];
/* Because two arrays are already sorted by the docId, so we can use the method below to calculate the correlation */
int start_j = 0;
for(int i = 0; i < t1.size; i++)
{
for(int j = start_j; j < t2.size; j++)
{
/* If i matched j */
if (t2.docIds[j] == t1.docIds[i])
{
start_j = j + 1; /* If i matched j, then store the position j+1 as the start point of next matching. */
dSum += t1.weights[i] * t2.weights[j]; /* accumulate the value of (weight_i * weight_j) */
}
}
/* If reaching the end of t2, then no need to visit the left part of t1. */
if (start_j >= t2.size)
{
break;
}
}
double correlation = Math.sqrt(dSum) / ( t1.sqrtSumWeight * t2.sqrtSumWeight );
return correlation;
}
public class TermVector {
public int[] docIds;
public double[] weights;
public double sqrtSumWeight;
public int size;
/**
* @param str "60-0.000822 221-0.003378 230-0.001383 325-0.002527"
*/
public TermVector(double sqrtSumWeight, String str)
{
this.sqrtSumWeight = sqrtSumWeight;
String[] vector = str.trim().split(" ");
size = vector.length;
docIds = new int[size];
weights = new double[size];
for(int i = 0; i < size; i++)
{
String[] tmp = vector[i].split("-");
docIds[i] = Integer.valueOf(tmp[0]);
weights[i] = Double.valueOf(tmp[1]);
}
}
}
作者: commonly 发布时间: 2011-12-20
nobody?
作者: commonly 发布时间: 2011-12-20
相关阅读 更多
热门阅读
-
office 2019专业增强版最新2021版激活秘钥/序列号/激活码推荐 附激活工具
阅读:74
-
如何安装mysql8.0
阅读:31
-
Word快速设置标题样式步骤详解
阅读:28
-
20+道必知必会的Vue面试题(附答案解析)
阅读:37
-
HTML如何制作表单
阅读:22
-
百词斩可以改天数吗?当然可以,4个步骤轻松修改天数!
阅读:31
-
ET文件格式和XLS格式文件之间如何转化?
阅读:24
-
react和vue的区别及优缺点是什么
阅读:121
-
支付宝人脸识别如何关闭?
阅读:21
-
腾讯微云怎么修改照片或视频备份路径?
阅读:28