一尘不染

用于字符串数组的delphi mergesort

algorithm

http://www.explainth.at/zh-CN/delphi/dsort.shtml上找到了此编码的mergesort(站点关闭,但尝试使用Wayback机器或以下站点:http
//read.pudn.com/downloads192/sourcecode/delphi_control/901147
/Sorts.pas__.htm),但本质上定义的数组不适用于字符串数组。
type TSortArray = array[0..8191] of Double;
我想传递一个字符串数组,该字符串数组可能会消除重复项(这将是Union?),
并在可能的情况下保留原始顺序,以便以后将其恢复为原始索引位置减去课程的重复项(原始索引),
以便可以将数组传递回去。进行进一步处理。我正在使用包含数百万个字符串(14至3000万个)的非常大的字符串文件,因此TStringList不是一个选择。这些大文件的最佳选择是使用字符串数组或记录数组(或者可能是单个链接列表?),并使用针对大量数据的稳定算法进行排序。
  1. 我如何更改它以获取字符串数组?
  2. 如何进一步修改以删除或至少标记重复项?
  3. 是否可以存储原始索引号以将字符串放回原始位置?
  4. 与单个链接列表相比,字符串数组或记录数组适合大量字符串吗?

按重要性的顺序列出问题,因此,如果仅回答问题1,就可以了。预先感谢您的所有投入。


procedure MergeSort(var Vals:TSortArray;ACount:Integer);
var AVals:TSortArray;

  procedure Merge(ALo,AMid,AHi:Integer);
  var i,j,k,m:Integer;
  begin
    i:=0;
    for j:=ALo to AMid do
    begin
      AVals[i]:=Vals[j];
      inc(i);
      //copy lower half or Vals into temporary array AVals
    end;

    i:=0;j:=AMid + 1;k:=ALo;//j could be undefined after the for loop!
    while ((k < j) and (j <= AHi)) do
    if (AVals[i] < Vals[j]) then
    begin
      Vals[k]:=AVals[i];
      inc(i);inc(k);
    end else
    begin
      Vals[k]:=Vals[j];
      inc(k);inc(j);
    end;
    {locate next greatest value in Vals or AVals and copy it to the
     right position.}

    for m:=k to j - 1 do
    begin
      Vals[m]:=AVals[i];
      inc(i);
    end;
    //copy back any remaining, unsorted, elements
  end;

  procedure PerformMergeSort(ALo,AHi:Integer);
  var AMid:Integer;
  begin
    if (ALo < AHi) then
    begin
      AMid:=(ALo + AHi) shr 1;
      PerformMergeSort(ALo,AMid);
      PerformMergeSort(AMid + 1,AHi);
      Merge(ALo,AMid,AHi);   <==== passing the array as string causes AV breakdown here
    end;
  end;

begin
  SetLength(AVals, ACount);
  PerformMergeSort(0,ACount - 1);
end;

阅读 243

收藏
2020-07-28

共1个答案

一尘不染

回答第二个问题:带有重复删除的Mergesort修改。应该适用于字符串。

//returns new valid length
function MergeSortRemoveDuplicates(var Vals: array of Integer):Integer;
var
  AVals: array of Integer;

   //returns index of the last valid element
  function Merge(I0, I1, J0, J1: Integer):Integer;
  var
    i, j, k, LC:Integer;
  begin
    LC := I1 - I0;
    for i := 0 to LC do
      AVals[i]:=Vals[i + I0];
      //copy lower half or Vals into temporary array AVals

    k := I0;
    i := 0;
    j := J0;
    while ((i <= LC) and (j <= J1)) do
    if (AVals[i] < Vals[j]) then begin
      Vals[k] := AVals[i];
      inc(i);
      inc(k);
    end else  if (AVals[i] > Vals[j]) then begin
      Vals[k]:=Vals[j];
      inc(k);
      inc(j);
    end else begin //duplicate
      Vals[k] := AVals[i];
      inc(i);
      inc(j);
      inc(k);
    end;

    //copy the rest
    while i <= LC do begin
      Vals[k] := AVals[i];
      inc(i);
      inc(k);
    end;

    if k <> j then
      while j <= J1 do begin
        Vals[k]:=Vals[j];
        inc(k);
        inc(j);
      end;

    Result := k - 1;
  end;

 //returns index of the last valid element

  function PerformMergeSort(ALo, AHi:Integer): Integer; //returns
  var
    AMid, I1, J1:Integer;
  begin

  //It would be wise to use Insertion Sort when (AHi - ALo) is small (about 32-100)
    if (ALo < AHi) then
    begin
      AMid:=(ALo + AHi) shr 1;
      I1 := PerformMergeSort(ALo, AMid);
      J1 := PerformMergeSort(AMid + 1, AHi);
      Result := Merge(ALo, I1, AMid + 1, J1);
    end else
      Result := ALo;
  end;

begin
  SetLength(AVals, Length(Vals) div 2 + 1);
  Result := 1 + PerformMergeSort(0, High(Vals));
end;


//short test
var
  A: array of Integer;
  i, NewLen: Integer;
begin
  Randomize;
  SetLength(A, 12);
  for i := 0 to High(A) do
    A[i] := Random(10);
   NewLen := MergeSortRemoveDuplicates(A);
   SetLength(A, NewLen);
   for i := 0 to High(A) do
     Memo1.Lines.Add(IntToStr(A[i]))
  end;

字符串的简单修改:

function MergeSortRemoveDuplicates(var Vals: array of String):Integer;
var
  AVals: array of String;

和测试用例:

var
  List: TStringList;
  Arr: array of string;
  i, n: Integer;
begin
  with TStringList.Create do try
    LoadFromFile('F:\m2.txt'); //contains some equal strings
    SetLength(Arr, Count);
    for i := 0 to Count - 1 do
      Arr[i] := Strings[i];
  finally
    Free
  end;
  n := MergeSortRemoveDuplicates(Arr);
  for i := 0 to n - 1 do
    Memo1.Lines.Add(Arr[i]);
end;
2020-07-28