发现长图案

一尘不染

发现长图案

algorithm

给定一个排序的数字列表，我想找到最长的子序列，其中连续元素之间的差异在几何上不断增加。所以如果列表是

 1, 2, 3, 4, 7, 15, 27, 30, 31, 81

那么子序列是1, 3, 7, 15, 31。或者，考虑1, 2, 5, 6, 11, 15, 23, 41, 47哪个子序列5, 11, 23, 47具有a = 3和k =2。

可以在O（n 2）时间内解决吗？其中n是列表的长度。

我对两种情况都比较感兴趣，在一般情况下，差异的进展是ak，ak 2，ak 3等，其中a和k都是整数，在
特殊情况下a = 1，所以差异的进展是k，k2，k 3等。

阅读 274

2020-07-28

共1个答案

一尘不染

更新资料

我对算法进行了改进，该算法平均取O（M +N ^ 2）和O（M + N）的内存需求。主要与以下描述的协议相同，但是为了计算ech差异D 的可能因子A，K ，我预装了一个表。对于M = 10 ^ 7，此表花费不到一秒钟的时间。

我做了一个不到10分钟即可解决N = 10 ^ 5个不同的随机整数元素的C实现。

这是C语言中的源代码：执行即可：gcc -O3 -o findgeo findgeo.c

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <memory.h>
#include <time.h>

struct Factor {
    int a;
    int k;
    struct Factor *next;
};

struct Factor *factors = 0;
int factorsL=0;

void ConstructFactors(int R) {
    int a,k,C;
    int R2;
    struct Factor *f;
    float seconds;
    clock_t end;
    clock_t start = clock();

    if (factors) free(factors);
    factors = malloc (sizeof(struct Factor) *((R>>1) + 1));
    R2 = R>>1 ;
    for (a=0;a<=R2;a++) {
        factors[a].a= a;
        factors[a].k=1;
        factors[a].next=NULL;
    }
    factorsL=R2+1;
    R2 = floor(sqrt(R));
    for (k=2; k<=R2; k++) {
        a=1;
        C=a*k*(k+1);
        while (C<R) {
            C >>= 1;
            f=malloc(sizeof(struct Factor));
            *f=factors[C];
            factors[C].a=a;
            factors[C].k=k;
            factors[C].next=f;
            a++;
            C=a*k*(k+1);
        }
    }

    end = clock();
    seconds = (float)(end - start) / CLOCKS_PER_SEC;
    printf("Construct Table: %f\n",seconds);
}

void DestructFactors() {
    int i;
    struct Factor *f;
    for (i=0;i<factorsL;i++) {
        while (factors[i].next) {
            f=factors[i].next->next;
            free(factors[i].next);
            factors[i].next=f;
        }
    }
    free(factors);
    factors=NULL;
    factorsL=0;
}

int ipow(int base, int exp)
{
    int result = 1;
    while (exp)
    {
        if (exp & 1)
            result *= base;
        exp >>= 1;
        base *= base;
    }

    return result;
}

void findGeo(int **bestSolution, int *bestSolutionL,int *Arr, int L) {
    int i,j,D;
    int mustExistToBeBetter;
    int R=Arr[L-1]-Arr[0];
    int *possibleSolution;
    int possibleSolutionL=0;
    int exp;
    int NextVal;
    int idx;
    int kMax,aMax;
    float seconds;
    clock_t end;
    clock_t start = clock();


    kMax = floor(sqrt(R));
    aMax = floor(R/2);
    ConstructFactors(R);
    *bestSolutionL=2;
    *bestSolution=malloc(0);

    possibleSolution = malloc(sizeof(int)*(R+1));

    struct Factor *f;
    int *H=malloc(sizeof(int)*(R+1));
    memset(H,0, sizeof(int)*(R+1));
    for (i=0;i<L;i++) {
        H[ Arr[i]-Arr[0] ]=1;
    }
    for (i=0; i<L-2;i++) {
        for (j=i+2; j<L; j++) {
            D=Arr[j]-Arr[i];
            if (D & 1) continue;
            f = factors + (D >>1);
            while (f) {
                idx=Arr[i] + f->a * f->k  - Arr[0];
                if ((f->k <= kMax)&& (f->a<aMax)&&(idx<=R)&&H[idx]) {
                    if (f->k ==1) {
                        mustExistToBeBetter = Arr[i] + f->a * (*bestSolutionL);
                    } else {
                        mustExistToBeBetter = Arr[i] + f->a * f->k * (ipow(f->k,*bestSolutionL) - 1)/(f->k-1);
                    }
                    if (mustExistToBeBetter< Arr[L-1]+1) {
                        idx=  floor(mustExistToBeBetter - Arr[0]);
                    } else {
                        idx = R+1;
                    }
                    if ((idx<=R)&&H[idx]) {
                        possibleSolution[0]=Arr[i];
                        possibleSolution[1]=Arr[i] + f->a*f->k;
                        possibleSolution[2]=Arr[j];
                        possibleSolutionL=3;
                        exp = f->k * f->k * f->k;
                        NextVal = Arr[j] + f->a * exp;
                        idx=NextVal - Arr[0];
                        while ( (idx<=R) && H[idx]) {
                            possibleSolution[possibleSolutionL]=NextVal;
                            possibleSolutionL++;
                            exp = exp * f->k;
                            NextVal = NextVal + f->a * exp;
                            idx=NextVal - Arr[0];
                        }

                        if (possibleSolutionL > *bestSolutionL) {
                            free(*bestSolution);
                            *bestSolution = possibleSolution;
                            possibleSolution = malloc(sizeof(int)*(R+1));
                            *bestSolutionL=possibleSolutionL;
                            kMax= floor( pow (R, 1/ (*bestSolutionL) ));
                            aMax= floor(R /  (*bestSolutionL));
                        }
                    }
                }
                f=f->next;
            }
        }
    }

    if (*bestSolutionL == 2) {
        free(*bestSolution);
        possibleSolutionL=0;
        for (i=0; (i<2)&&(i<L); i++ ) {
            possibleSolution[possibleSolutionL]=Arr[i];
            possibleSolutionL++;
        }
        *bestSolution = possibleSolution;
        *bestSolutionL=possibleSolutionL;
    } else {
        free(possibleSolution);
    }
    DestructFactors();
    free(H);

    end = clock();
    seconds = (float)(end - start) / CLOCKS_PER_SEC;
    printf("findGeo: %f\n",seconds);
}

int compareInt (const void * a, const void * b)
{
    return *(int *)a - *(int *)b;
}

int main(void) {
    int N=100000;
    int R=10000000;
    int *A = malloc(sizeof(int)*N);
    int *Sol;
    int SolL;
    int i;


    int *S=malloc(sizeof(int)*R);
    for (i=0;i<R;i++) S[i]=i+1;

    for (i=0;i<N;i++) {
        int r = rand() % (R-i);
        A[i]=S[r];
        S[r]=S[R-i-1];
    }

    free(S);
    qsort(A,N,sizeof(int),compareInt);

/*
    int step = floor(R/N);
    A[0]=1;
    for (i=1;i<N;i++) {
        A[i]=A[i-1]+step;
    }
*/

    findGeo(&Sol,&SolL,A,N);

    printf("[");
    for (i=0;i<SolL;i++) {
        if (i>0) printf(",");
        printf("%d",Sol[i]);
    }
    printf("]\n");
    printf("Size: %d\n",SolL);

    free(Sol);
    free(A);
    return EXIT_SUCCESS;
}

我将尝试证明，我提出的算法
O（N`2 + M）平均而言是均匀
分布的随机序列。我不是数学家，也不习惯做
这种演示，因此，请尽一切可能纠正我
看到的任何错误。

有4个缩进循环，两个第一个是N ^ 2因子。M用于
计算可能的因素表）。

每个循环平均仅执行一次第三循环。您可以看到
此检查了预先计算的因子表的大小。
当N-> inf时，大小为M。因此，每对平均步长为M / M = 1。

因此，证明恰好检查了第四循环。（遍历
完好的序列的序列对所有对的执行均小于或等于O（N ^ 2）。

为了证明这一点，我将考虑两种情况：一种是M >> N，另一种是
M〜=N。其中M是初始数组的最大差：M = S（n）-S（1）。

对于第一种情况，（M >> N）找到巧合的概率为p = N / M。要
开始一个序列，它必须与第二个元素和b + 1个元素重合，其中b是
到目前为止最佳序列的长度。因此循环将进入
N ^ 2 （N / M）^ 2时间。
该系列的平均长度（假设无穷级数）为p /（1-p）= N /（MN）。因此，
执行循环的总次数为N ^ 2 （N / M）^ 2 * N /（MN）。当
M >> N 时，它接近于0 。这里的问题是当M〜= N时。

现在让我们考虑M〜= N的情况。让我们认为b是
到目前为止的最佳序列长度。对于A = k = 1的情况，则序列必须
在Nb之前开始，因此序列数将为Nb，并且
循环所需的时间将最大为（Nb）* b。

对于A> 1和k = 1，我们可以推断到
（NA * b / d）* bd为M / N（
数字之间的平均距离）。如果我们将所有A的值相加（从1到dN / b），那么
我们看到的上限是：

\ sum_ {A = 1} ^ {dN / b} \ left（N- \ frac {Ab} {d} \ right）b = \ frac {N ^ 2d} {2}

对于k> = 2的情况，我们看到序列必须在之前开始
NA * k ^ b / d，因此循环将输入的
平均值，A * k ^ b / d）* b并将
所有从1到dN / k ^ b的A 加起来，它的极限为

\ sum_ {A = 1} ^ {dN / k ^ b} \ left（N- \ frac {Ak ^ b} {d} \ right）b = \ frac {bN ^ 2d} {2k ^ b}

在此，最坏的情况是当b最小时。因为我们正在考虑最小
序列，所以让我们考虑b = 2的最坏情况，因此
对于给定的k，第4个循环的通过次数将小于

\ frac {dN ^ 2} {k ^ 2} 。

如果我们将所有k从2加到无限，将是：

\ sum_ {k = 2} ^ {\ infty} \ frac {dN ^ 2} {k ^ 2} = dN ^ 2 \ left（\ frac {\ pi ^ 2} {6} -1 \ right）

因此，将k = 1和k> = 2的所有遍加在一起，我们得到的最大值为：

\ frac {N ^ 2d} {2} + N ^ 2d \ left（\ frac {\ pi ^ 2} {6} -1 \ right）= N ^ 2d \ left（\ frac {\ pi ^ 2} {6 }-\ frac {1} {2} \ right）\ simeq 1.45N ^ 2d

注意，d ＝ M / N ＝ 1 / p。

因此，我们有两个限制，一个是当d = 1 / p = M / N变为1
时变为无限，另一个是当d变为无限时变为无限。因此，我们的限制是
两者中的最小值，最坏的情况是两种情况都交叉时。因此，如果我们
求解方程：

N ^ 2d \ left（\ frac {\ pi ^ 2} {6}-\ frac {1} {2} \ right）= N ^ 2 \ left（\ frac {N} {M} \ right）^ 2 \ frac {N} {MN} = N ^ 2 \ left（\ frac {1} {d} \ right）^ 2 \ frac {1} {d-1}

我们看到最大值是在d = 1.353时

因此证明了第四循环总共将被处理少于1.55N ^ 2 次。

当然，这是一般情况。在最坏的情况下，我无法找到一种方法来生成其第四级循环高于O（N ^ 2）的级数，并且我坚信它们不存在，但是我不是数学家来证明这一点。

旧答案

这是O（（n ^ 2）* cube_root（M））的平均值的解决方案，其中M是数组的第一个元素与最后一个元素之间的差。并且内存需求为O（M + N）。

1.-构造一个长度为M的数组H，以使M [i-S [0]] = true（如果i存在于
初始数组中），而返回false（如果不存在）。

2.-对于数组S [j]，S [i]中的每一对，执行以下操作：

2.1检查它是否可能是解决方案的第一和第三要素。为此
，计算满足方程S（i）= S（j）+AK + AK ^ 2的所有可能的A，K对。检查此SO
问题以查看如何解决此问题。并检查是否存在第二个元素：S [i] + A * K

2.2还应检查是否存在元素比我们拥有的最佳解决方案更进一步。例如，如果到目前为止我们拥有的最佳解决方案是4个元素长，那么请检查是否存在元素A [j] + A K + A K ^ 2+ A K ^ 3 + A K ^ 4

2.3如果2.1和2.2是正确的，则迭代此系列多长时间，并将其设置为bestSolution，直到现在为止的时间比上一个更长。

这是javascript中的代码：

function getAKs(A) {
    if (A / 2 != Math.floor(A / 2)) return [];
    var solution = [];
    var i;
    var SR3 = Math.pow(A, 1 / 3);
    for (i = 1; i <= SR3; i++) {
        var B, C;
        C = i;
        B = A / (C * (C + 1));
        if (B == Math.floor(B)) {
            solution.push([B, C]);
        }

        B = i;
        C = (-1 + Math.sqrt(1 + 4 * A / B)) / 2;
        if (C == Math.floor(C)) {
            solution.push([B, C]);
        }
    }

    return solution;
}

function getBestGeometricSequence(S) {
    var i, j, k;

    var bestSolution = [];

    var H = Array(S[S.length-1]-S[0]);
    for (i = 0; i < S.length; i++) H[S[i] - S[0]] = true;

    for (i = 0; i < S.length; i++) {
        for (j = 0; j < i; j++) {
            var PossibleAKs = getAKs(S[i] - S[j]);
            for (k = 0; k < PossibleAKs.length; k++) {
                var A = PossibleAKs[k][0];
                var K = PossibleAKs[k][17];

                var mustExistToBeBetter;
                if (K==1) {
                    mustExistToBeBetter = S[j] + A * bestSolution.length;
                } else {
                    mustExistToBeBetter = S[j] + A * K * (Math.pow(K,bestSolution.length) - 1)/(K-1);
                }

                if ((H[S[j] + A * K - S[0]]) && (H[mustExistToBeBetter - S[0]])) {
                    var possibleSolution=[S[j],S[j] + A * K,S[i]];
                    exp = K * K * K;
                    var NextVal = S[i] + A * exp;
                    while (H[NextVal - S[0]] === true) {
                        possibleSolution.push(NextVal);
                        exp = exp * K;
                        NextVal = NextVal + A * exp;
                    }

                    if (possibleSolution.length > bestSolution.length) {
                        bestSolution = possibleSolution;
                    }
                }
            }
        }
    }
    return bestSolution;
}

//var A= [ 1, 2, 3,5,7, 15, 27, 30,31, 81];
var A=[];
for (i=1;i<=3000;i++) {
    A.push(i);
}
var sol=getBestGeometricSequence(A);

$("#result").html(JSON.stringify(sol));

您可以在此处检查代码：http : //jsfiddle.net/6yHyR/1/

我之所以维持另一种解决方案，是因为我相信当M比N大时，它仍然更好。

2020-07-28