Java 类weka.core.NormalizableDistance 实例源码

项目:JCLAL    文件:AcumulativeDistanceContainer.java   
/**
 * Constructor by default.
 *
 * @param objectiveColumns The objetive colums
 * @param rows The rows
 * @param distanceFunction The distance function used to calculate the
 * similarity
 */
public AcumulativeDistanceContainer(Instances objectiveColumns, Instances rows,
        NormalizableDistance distanceFunction) {
    this.distanceFunction = distanceFunction;

    indexesChanges = new int[rows.numInstances()];
    for (int i = 0; i < indexesChanges.length; i++) {
        indexesChanges[i] = i;
    }
    size = indexesChanges.length;

    acumulativeValue = new double[rows.numInstances()];
    for (int i = 0; i < acumulativeValue.length; i++) {
        for (int j = 0; j < objectiveColumns.numInstances(); j++) {
            acumulativeValue[i] += distanceFunction.distance(rows.instance(i),
                    objectiveColumns.instance(j));
        }
    }
}
项目:JCLAL    文件:DensityDiversityQueryStrategy.java   
/**
 *
 * @param typeOfDistance
 *            Type of distance used
 */
public DensityDiversityQueryStrategy(NormalizableDistance typeOfDistance) {

    super();
    this.setMaximal(false);
    this.typeOfDistance = typeOfDistance;
}
项目:repo.kmeanspp.silhouette_score    文件:KMeansReduceTask.java   
/**
 * Utility function to examine the attribute ranges in a bunch of distance
 * functions and return a two instance dataset with the global mins/maxes of
 * numeric attributes set. This can be used to "prime" a distance function.
 * 
 * @param distanceFuncs a list of distance functions (where each potentially
 *          has only seen part of the overall dataset
 * @param headerNoSummary the header of the data that the distance functions
 *          have been seeing
 * @return a priming data set with global min and max values for numeric
 *         attributes
 * @throws DistributedWekaException if a problem occurs
 */
public static Instances computeDistancePrimingDataFromDistanceFunctions(
  List<NormalizableDistance> distanceFuncs, Instances headerNoSummary)
  throws DistributedWekaException {

  Instances prime = null;
  double[] mins = new double[headerNoSummary.numAttributes()];
  double[] maxes = new double[headerNoSummary.numAttributes()];
  try {
    for (int i = 0; i < headerNoSummary.numAttributes(); i++) {
      if (headerNoSummary.attribute(i).isNumeric()) {
        mins[i] = Double.MAX_VALUE;
        maxes[i] = Double.MIN_VALUE;
      } else {
        mins[i] = Utils.missingValue();
        maxes[i] = Utils.missingValue();
      }
    }
    for (NormalizableDistance d : distanceFuncs) {
      double[][] ranges = d.getRanges();
      for (int i = 0; i < headerNoSummary.numAttributes(); i++) {
        if (ranges[i][NormalizableDistance.R_MIN] < mins[i]) {
          mins[i] = ranges[i][NormalizableDistance.R_MIN];
        }
        if (ranges[i][NormalizableDistance.R_MAX] > maxes[i]) {
          maxes[i] = ranges[i][NormalizableDistance.R_MAX];
        }
      }
    }
  } catch (Exception ex) {
    throw new DistributedWekaException(ex);
  }

  prime = new Instances(headerNoSummary, 2);
  prime.add(new DenseInstance(1.0, mins));
  prime.add(new DenseInstance(1.0, maxes));

  return prime;
}
项目:repo.kmeanspp.silhouette_score    文件:MedianOfWidestDimension.java   
/**
 * Returns the widest dimension. The width of each dimension (for the points
 * inside the node) is normalized, if m_NormalizeNodeWidth is set to true.
 * 
 * @param nodeRanges The attributes' range of the points inside the node that
 *          is to be split.
 * @param universe The attributes' range for the whole point-space.
 * @return The index of the attribute/dimension in which the points of the
 *         node have widest spread.
 */
protected int widestDim(double[][] nodeRanges, double[][] universe) {
  final int classIdx = m_Instances.classIndex();
  double widest = 0.0;
  int w = -1;
  if (m_NormalizeDimWidths) {
    for (int i = 0; i < nodeRanges.length; i++) {
      double newWidest = nodeRanges[i][NormalizableDistance.R_WIDTH]
        / universe[i][NormalizableDistance.R_WIDTH];
      if (newWidest > widest) {
        if (i == classIdx) {
          continue;
        }
        widest = newWidest;
        w = i;
      }
    }
  } else {
    for (int i = 0; i < nodeRanges.length; i++) {
      if (nodeRanges[i][NormalizableDistance.R_WIDTH] > widest) {
        if (i == classIdx) {
          continue;
        }
        widest = nodeRanges[i][NormalizableDistance.R_WIDTH];
        w = i;
      }
    }
  }
  return w;
}
项目:umple    文件:MedianOfWidestDimension.java   
/**
 * Returns the widest dimension. The width of each dimension (for the points
 * inside the node) is normalized, if m_NormalizeNodeWidth is set to true.
 * 
 * @param nodeRanges The attributes' range of the points inside the node that
 *          is to be split.
 * @param universe The attributes' range for the whole point-space.
 * @return The index of the attribute/dimension in which the points of the
 *         node have widest spread.
 */
protected int widestDim(double[][] nodeRanges, double[][] universe) {
  final int classIdx = m_Instances.classIndex();
  double widest = 0.0;
  int w = -1;
  if (m_NormalizeDimWidths) {
    for (int i = 0; i < nodeRanges.length; i++) {
      double newWidest = nodeRanges[i][NormalizableDistance.R_WIDTH]
        / universe[i][NormalizableDistance.R_WIDTH];
      if (newWidest > widest) {
        if (i == classIdx) {
          continue;
        }
        widest = newWidest;
        w = i;
      }
    }
  } else {
    for (int i = 0; i < nodeRanges.length; i++) {
      if (nodeRanges[i][NormalizableDistance.R_WIDTH] > widest) {
        if (i == classIdx) {
          continue;
        }
        widest = nodeRanges[i][NormalizableDistance.R_WIDTH];
        w = i;
      }
    }
  }
  return w;
}
项目:repo.kmeanspp.silhouette_score    文件:KMeansClustererHadoopJob.java   
/**
 * Make the final PreconstructedKMeans clusterer to wrap the centroids and
 * stats found during map-reduce.
 * 
 * @param best the best result from the runs of k-means that were performed in
 *          parallel
 * @param preprocess any pre-processing filters applied
 * @param initialStartingPoints the initial starting centroids
 * @param finalNumIterations the final number of iterations performed
 * @return a final clusterer object
 * @throws DistributedWekaException if a problem occurs
 */
protected Clusterer makeFinalClusterer(KMeansReduceTask best,
  Filter preprocess, Instances initialStartingPoints, int finalNumIterations)
  throws DistributedWekaException {

  Clusterer finalClusterer = null;
  PreconstructedKMeans finalKMeans = new PreconstructedKMeans();
  // global priming data for the distance function (this will be in
  // the transformed space if we're using preprocessing filters)
  Instances globalPrimingData = best.getGlobalDistanceFunctionPrimingData();
  NormalizableDistance dist = new EuclideanDistance();
  dist.setInstances(globalPrimingData);
  finalKMeans.setClusterCentroids(best.getCentroidsForRun());
  finalKMeans.setFinalNumberOfIterations(finalNumIterations + 1);
  if (initialStartingPoints != null) {
    finalKMeans.setInitialStartingPoints(initialStartingPoints);
  }
  try {
    finalKMeans.setDistanceFunction(dist);
    finalKMeans.setClusterStats(best.getAggregatedCentroidSummaries());
  } catch (Exception e) {
    throw new DistributedWekaException(e);
  }

  if (!getInitWithRandomCentroids()) {
    finalKMeans.setInitializationMethod(new SelectedTag(
      SimpleKMeans.KMEANS_PLUS_PLUS, SimpleKMeans.TAGS_SELECTION));
  }

  finalKMeans.setDisplayStdDevs(getDisplayCentroidStdDevs());

  finalClusterer = finalKMeans;

  if (preprocess != null) {
    PreconstructedFilteredClusterer fc =
      new PreconstructedFilteredClusterer();
    fc.setFilter(preprocess);
    fc.setClusterer(finalKMeans);
    finalClusterer = fc;
  }

  return finalClusterer;
}
项目:repo.kmeanspp.silhouette_score    文件:CanopyMapTask.java   
public NormalizableDistance getDistanceFunction() {
  return m_distanceFunction;
}
项目:repo.kmeanspp.silhouette_score    文件:CanopyReduceTask.java   
/**
 * Initializes the final distance function using range information in the
 * distance functions of the individual Canopy clusterers. We use this
 * initialization when there is more than just a missing values filter being
 * used because, in this case, the min/max info in the global attribute
 * summary info is not applicable (i.e. filter(s) might transform or create
 * new attributes for which we don't have summary information for in the
 * global ARFF header).
 * 
 * @param clist the list of individual Canopy clusterers
 * @param finalDistance the distance function to initialize
 * @throws Exception if a problem occurs
 */
protected void initFinalDistanceFunctionFiltersInPlay(List<Canopy> clist,
  NormalizableDistance finalDistance) throws Exception {

  Instances filteredStructure =
    new Instances(((ECanopy) clist.get(0)).getDistanceFunction()
      .getInstances(), 0);

  double[] globalMax = new double[filteredStructure.numAttributes()];
  double[] globalMin = new double[filteredStructure.numAttributes()];

  double[][] ranges =
    ((ECanopy) clist.get(0)).getDistanceFunction().getRanges();
  for (int i = 0; i < filteredStructure.numAttributes(); i++) {
    globalMin[i] = ranges[i][NormalizableDistance.R_MIN];
    globalMax[i] = ranges[i][NormalizableDistance.R_MAX];
  }

  for (int i = 1; i < clist.size(); i++) {
    ECanopy currentC = ((ECanopy) clist.get(i));
    ranges = currentC.getDistanceFunction().getRanges();
    for (int k = 0; k < filteredStructure.numAttributes(); k++) {
      if (ranges[k][NormalizableDistance.R_MIN] < globalMin[k]) {
        globalMin[k] = ranges[k][NormalizableDistance.R_MIN];
      }

      if (ranges[k][NormalizableDistance.R_MAX] > globalMax[k]) {
        globalMax[k] = ranges[k][NormalizableDistance.R_MAX];
      }
    }
  }

  for (int i = 0; i < filteredStructure.numAttributes(); i++) {
    if (filteredStructure.attribute(i).isNominal()) {
      // doesn't matter for non-numeric
      globalMin[i] = Utils.missingValue();
      globalMax[i] = Utils.missingValue();
    }
  }

  filteredStructure.add(new DenseInstance(1.0, globalMin));
  filteredStructure.add(new DenseInstance(1.0, globalMax));

  finalDistance.setInstances(filteredStructure);
}
项目:JCLAL    文件:MultiLabelDensityDiversityQueryStrategy.java   
/**
 * Constructor
 * 
 * @param typeOfDistance
 *            Type of distance used
 */
public MultiLabelDensityDiversityQueryStrategy(NormalizableDistance typeOfDistance) {

    super();

    setMaximal(false);

    setTypeOfDistance(typeOfDistance);

}
项目:JCLAL    文件:KNearestDistanceContainer.java   
/**
 * Constructor
 * 
 * @param instances
 *            The instances
 * @param distanceFunction
 *            The distance function to be used
 * @param k
 *            The number of k nearest neighbors
 */
public KNearestDistanceContainer(Instances instances, NormalizableDistance distanceFunction, int k) {

    super(instances, distanceFunction);

    accumulativeDistanceKNearest = new double[size];
    kNearest = new HashSet[size];
    this.k = k;

    // Compute the k-nearest neighbors of each instance

    for (int i = 0; i < size; i++) {

        ArrayList<Container> array = new ArrayList<Container>(k);
        kNearest[i] = new HashSet<Integer>();

        for (int j = 0; j < size; j++) {

            if (i != j)
                array.add(new Container<Integer>(getDistance(i, j), j));

        }

        OrderUtils.mergeSort(array, false);

        // Fill the distances of the k-nearest neighbors of i

        for (int pos = 0; pos < k; pos++) {
            accumulativeDistanceKNearest[i] += array.get(pos).getKey();
            kNearest[i].add(Integer.parseInt(array.get(pos).getValue().toString()));
        }
    }
}
项目:repo.kmeanspp.silhouette_score    文件:CentroidSketch.java   
/**
 * Constructor.
 * 
 * @param initialSketch the initial starting point (typically one randomly
 *          chosen instance for the k-means|| algorithm)
 * @param distanceFunction the distance function to use
 * @param size the size of the reservoir (i.e. how many points to consider
 *          adding to the sketch at each iteration)
 * @param seed the seed for random number generation
 */
public CentroidSketch(Instances initialSketch,
  NormalizableDistance distanceFunction, int size, int seed) {
  m_currentSketch = initialSketch;
  m_distanceFunction = distanceFunction;
  m_seed = seed;
  m_size = size;

  m_weightedCenterSample = new WeightedReservoirSample(m_size, m_seed);
}
项目:JCLAL    文件:DensityDiversityQueryStrategy.java   
/**
 * Get the distance function used
 *
 * @return The type of similarity.
 */
public NormalizableDistance getTypeOfDistance() {
    return typeOfDistance;
}
项目:JCLAL    文件:DensityDiversityQueryStrategy.java   
/**
 * Set the distance function to use
 *
 * @param typeOfDistance
 *            The type of similarity. Used in Density Diversity.
 */
public void setTypeOfDistance(NormalizableDistance typeOfDistance) {
    this.typeOfDistance = typeOfDistance;
}
项目:JCLAL    文件:MultiLabelDensityDiversityQueryStrategy.java   
/**
 * Get the distance function used
 *
 * @return The type of similarity.
 */
public NormalizableDistance getTypeOfDistance() {
    return typeOfDistance;
}
项目:JCLAL    文件:MultiLabelDensityDiversityQueryStrategy.java   
/**
 * Set the distance function to use
 *
 * @param typeOfDistance
 *            The type of similarity. Used in Density Diversity.
 */
private void setTypeOfDistance(NormalizableDistance typeOfDistance) {

    this.typeOfDistance = typeOfDistance;
}
项目:JCLAL    文件:DistanceContainer.java   
/**
 * Default constructor
 *
 * @param instances
 *            The set of instances
 * @param distanceFunction
 *            The distance function used to calculate the distance among two
 *            instances
 */
public DistanceContainer(Instances instances, NormalizableDistance distanceFunction) {

    size = instances.numInstances();

    indexesChanges = new int[size];

    acumulativeValue = new double[size];

    numAttributes = instances.numAttributes();

    int m = size - 1;

    distance = new double[m][];

    int temp;

    double valueTemp;

    maxDistance = Double.MIN_VALUE;
    minDistance = Double.MAX_VALUE;

    for (int i = 0; i < m; ++i) {

        distance[i] = new double[size - i - 1];

        // In the begining the index and the value are equals
        indexesChanges[i] = i;

        for (int j = i + 1; j < size; ++j) {
            temp = j - i - 1;

            valueTemp = distanceFunction.distance(instances.instance(i), instances.instance(j));

            if (valueTemp > maxDistance)
                maxDistance = valueTemp;

            if (valueTemp < minDistance)
                minDistance = valueTemp;

            setStoreDistance(i, temp, valueTemp);

            // accumulative distance
            // acumulativeValue[i] += valueTemp;
            // acumulativeValue[j] += valueTemp;

        }
    }

    indexesChanges[size - 1] = size - 1;

    scaleMinMax();
}
项目:JCLAL    文件:DistanceContainer.java   
/**
 * Constructor by default
 *
 * @param instances
 *            dataset
 * @param distanceFunction
 *            The distance function used to calculate the distance
 * @param matrixOverFile
 *            Whether the matrix will be stored into a file
 * @throws java.lang.Exception
 *             The exception that will be launched
 */
public DistanceContainer(Instances instances, NormalizableDistance distanceFunction, boolean matrixOverFile)
        throws Exception {

    this.matrixOverFile = matrixOverFile;

    size = instances.numInstances();

    indexesChanges = new int[size];

    acumulativeValue = new double[size];

    numAttributes = instances.numAttributes();

    int m = size - 1;

    if (matrixOverFile) {
        distanceMatrix = new Matrix(size, size, true);
    } else {
        distance = new double[m][];
    }

    maxDistance = Double.MIN_VALUE;
    minDistance = Double.MAX_VALUE;

    int temp;
    double valueTemp;
    for (int i = 0; i < m; ++i) {

        if (!matrixOverFile) {
            distance[i] = new double[size - i - 1];
        }

        // In the begining the index and the value are equals
        indexesChanges[i] = i;

        for (int j = i + 1; j < size; ++j) {

            temp = j - i - 1;

            valueTemp = distanceFunction.distance(instances.instance(i), instances.instance(j));

            if (valueTemp > maxDistance)
                maxDistance = valueTemp;

            if (valueTemp < minDistance)
                minDistance = valueTemp;

            setStoreDistance(i, temp, valueTemp);

            // acumulative distance
            // acumulativeValue[i] += valueTemp;
            // acumulativeValue[j] += valueTemp;
        }
    }

    indexesChanges[size - 1] = size - 1;

    scaleMinMax();
}
项目:repo.kmeanspp.silhouette_score    文件:CentroidSketch.java   
/**
 * Get the distance function being used
 * 
 * @return the distance function
 */
public NormalizableDistance getDistanceFunction() {
  return m_distanceFunction;
}
项目:repo.kmeanspp.silhouette_score    文件:CentroidSketch.java   
/**
 * Set the distance function to use
 * 
 * @param distFunc the distance function to use
 */
public void setDistanceFunction(NormalizableDistance distFunc) {
  m_distanceFunction = distFunc;
}
项目:repo.kmeanspp.silhouette_score    文件:KMeansMapTask.java   
/**
 * Get the distance function in use
 *
 * @return the distance function in use
 */
public NormalizableDistance getDistanceFunction() {
  return m_distanceFunction;
}
项目:JCLAL    文件:KNearestDistanceContainer.java   
/**
 * Constructor
 * 
 * @param instances
 *            The instances
 * @param distanceFunction
 *            The distance function to be used
 * @param matrixOverFile
 *            It indicates whether the matrix is stored over a file or the
 *            main memory
 * @throws Exception
 *             Launch an exception in case that an error occurs.
 */
public KNearestDistanceContainer(Instances instances, NormalizableDistance distanceFunction, boolean matrixOverFile)
        throws Exception {

    super(instances, distanceFunction, matrixOverFile);

}