diff options
author | Jean-Pierre Ledure <jp@ledure.be> | 2023-01-21 16:43:39 +0100 |
---|---|---|
committer | Jean-Pierre Ledure <jp@ledure.be> | 2023-01-21 16:48:19 +0000 |
commit | ac21a5ee9b51ab771736a2695426196f2f663c34 (patch) | |
tree | e8e520afac4671e6446d8243f8369b2dbdff8dc2 /wizards | |
parent | c97441a5ee8a86a37fac0b506d0af0c2ae6d51fb (diff) |
ScriptForge (SF_Calc) new RemoveDuplicates() method
Remove duplicate values from a range of values.
The comparison between rows is done on a subset
of the columns in the range.
The resulting range replaces the input range,
in which, either:
all duplicate rows are cleared from their content
all duplicate rows are suppressed
and rows below are pushed upwards.
Anyway, the first copy of each set of duplicates
is kept and the initial sequence is preserved.
Arguments of the method:
Range: the range, as a string,
from which the duplicate rows should be removed
Columns: an array of column numbers to compare;
items are in the interval [1 .. range width]
Default = the first column in the range
Header: when True, the first row is a header row.
Default = False.
CaseSensitive: for string comparisons.
Default = False.
Mode: either "CLEAR" or "COMPACT" (Default)
For large ranges, the "COMPACT" mode
is probably significantly slower.
The method has been implemented for Basic
and Python user scripts.
The calc.xhp help page should be updated accordingly.
Change-Id: I352b2f3da98974d9482575850550cec4e27d2e01
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/145954
Tested-by: Jean-Pierre Ledure <jp@ledure.be>
Reviewed-by: Jean-Pierre Ledure <jp@ledure.be>
Tested-by: Jenkins
Diffstat (limited to 'wizards')
-rw-r--r-- | wizards/source/scriptforge/python/scriptforge.py | 3 | ||||
-rw-r--r-- | wizards/source/sfdocuments/SF_Calc.xba | 126 |
2 files changed, 129 insertions, 0 deletions
diff --git a/wizards/source/scriptforge/python/scriptforge.py b/wizards/source/scriptforge/python/scriptforge.py index a15261caed58..f368198be7e2 100644 --- a/wizards/source/scriptforge/python/scriptforge.py +++ b/wizards/source/scriptforge/python/scriptforge.py @@ -2263,6 +2263,9 @@ class SFDocuments: def PrintOut(self, sheetname = '~', pages = '', copies = 1): return self.ExecMethod(self.vbMethod, 'PrintOut', sheetname, pages, copies) + def RemoveDuplicates(self, range, columns = 1, header = False, casesensitive = False, mode = 'COMPACT'): + return self.ExecMethod(self.vbMethod, 'RemoveDuplicates', range, columns, header, casesensitive, mode) + def RemoveSheet(self, sheetname): return self.ExecMethod(self.vbMethod, 'RemoveSheet', sheetname) diff --git a/wizards/source/sfdocuments/SF_Calc.xba b/wizards/source/sfdocuments/SF_Calc.xba index f2c9fc34e2f2..a6dce33f9d6a 100644 --- a/wizards/source/sfdocuments/SF_Calc.xba +++ b/wizards/source/sfdocuments/SF_Calc.xba @@ -2111,6 +2111,7 @@ Public Function Methods() As Variant , "OpenRangeSelector" _ , "Printf" _ , "PrintOut" _ + , "RemoveDuplicates" _ , "RemoveSheet" _ , "RenameSheet" _ , "SetArray" _ @@ -2583,6 +2584,131 @@ Public Function Properties() As Variant End Function ' SFDocuments.SF_Calc.Properties REM ----------------------------------------------------------------------------- +Public Function RemoveDuplicates(Optional ByVal Range As Variant _ + , Optional ByVal Columns As Variant _ + , Optional ByVal Header As Variant _ + , Optional ByVal CaseSensitive As Variant _ + , Optional ByVal Mode As Variant _ + ) As String +''' Remove duplicate values from a range of values. +''' The comparison between rows is done on a subset of the columns in the range. +''' The resulting range replaces the input range, in which, either: +''' all duplicate rows are cleared from their content +''' all duplicate rows are suppressed and rows below are pushed upwards. +''' Anyway, the first copy of each set of duplicates is kept and the initial sequence is preserved. +''' Args: +''' Range: the range, as a string, from which the duplicate rows should be removed +''' Columns: an array of column numbers to compare; items are in the interval [1 .. range width] +''' Default = the first column in the range +''' Header: when True, the first row is a header row. Default = False. +''' CaseSensitive: for string comparisons. Default = False. +''' Mode: either "CLEAR" or "COMPACT" (Default) +''' For large ranges, the "COMPACT" mode is probably significantly slower. +''' Returns: +''' The resulting range as a string +''' Examples: +''' oCalc.RemoveDuplicates("Sheet1.B2:K11", Array(1, 2), Header := True, CaseSensitive := True) + +Dim sRemove As String ' Return value +Dim oRangeAddress As Object ' Parsed range as an _Address object +Dim sMirrorRange As String ' Mirror of initial range +Dim lRandom As Long ' Random number to build the worksheet name +Dim sWorkSheet As String ' Name of worksheet +Dim vRows() As Variant ' Array of row numbers +Dim sRowsRange As String ' Range of the last column of the worksheet +Dim sFullMirrorRange As String ' Mirrored data + rows column +Dim sLastRowsRange As String ' Same as sRowsRange without the first cell +Dim sDuplicates As String ' Formula identifying a duplicate row +Dim lColumn As Long ' Single column number +Dim sColumn As String ' Single column name +Dim sFilter As String ' Filter formula for final compaction or clearing + +Const cstThisSub = "SFDocuments.Calc.RemoveDuplicates" +Const cstSubArgs = "Range, [Columns], [Header=False], [CaseSensitive=False], [Mode=""COMPACT""|""CLEAR""]" + + If ScriptForge.SF_Utils._ErrorHandling() Then On Local Error GoTo Catch + sRemove = "" + +Check: + If IsMissing(Columns) Or IsEmpty(Columns) Then Columns = Array(1) + If Not IsArray(Columns) Then Columns = Array(Columns) + If IsMissing(Header) Or IsEmpty(Header) Then Header = False + If IsMissing(CaseSensitive) Or IsEmpty(CaseSensitive) Then CaseSensitive = False + If IsMissing(Mode) Or IsEmpty(Mode) Then Mode = "COMPACT" + If ScriptForge.SF_Utils._EnterFunction(cstThisSub, cstSubArgs) Then + If Not _IsStillAlive(True) Then GoTo Finally + If Not ScriptForge.SF_Utils._Validate(Range, "Range", V_STRING) Then GoTo Finally + If Not ScriptForge.SF_Utils._ValidateArray(Columns, "Columns", 1, ScriptForge.V_NUMERIC, True) Then GoTo Finally + If Not ScriptForge.SF_Utils._Validate(Header, "Header", ScriptForge.V_BOOLEAN) Then GoTo Finally + If Not ScriptForge.SF_Utils._Validate(CaseSensitive, "CaseSensitive", ScriptForge.V_BOOLEAN) Then GoTo Finally + If Not ScriptForge.SF_Utils._Validate(Mode, "Mode", V_STRING, Array("COMPACT", "CLEAR")) Then GoTo Finally + End If + +Try: + ' Let's assume the initial range is "$Sheet1.$B$11:$K$110" (100 rows, 10 clumns, no header) + ' Ignore header, consider only the effective data + If Header Then Set oRangeAddress = _Offset(Range, 1, 0, Height(Range) - 1, 0) Else Set oRangeAddress = _ParseAddress(Range) + + '** Step 1: create a worksheet and copy the range in A1 + lRandom = ScriptForge.SF_Session.ExecuteCalcFunction("RANDBETWEEN.NV", 1, 999999) + sWorkSheet = "SF_WORK_" & Right("000000" & lRandom, 6) + InsertSheet(sWorkSheet) + ' sMurrorRange = "$SF_WORK.$A$1:$J$100" + sMirrorRange = CopyToCell(oRangeAddress, "$" & sWorkSheet & ".$A$1") + + '** Step 2: add a column in the mirror with the row numbers in the initial range + ' vRows = [11..110] + With oRangeAddress.XCellRange + vRows = ScriptForge.RangeInit(CLng(.RangeAddress.StartRow + 1), CLng(.RangeAddress.EndRow + 1)) + End With + ' sRowsRange = "$SF_WORK.$K$1:$K$100" + sRowsRange = SetArray(Offset(sMirrorRange, , Width(sMirrorRange), 1, 1), vRows()) + + '** Step 3: sort the mirrored data, including the row numbers column + ' sMirrorRange = "$SF_WORK.$A$1:$K$100" + sFullMirrorRange = Offset(sMirrorRange, , , , Width(sMirrorRange) + 1) + SortRange(sFullMirrorRange, SortKeys := Columns, CaseSensitive := CaseSensitive) + + '** Step 4: Filter out the row numbers containing duplicates + ' sLastRowRange = "$SF_WORK.$K$2:$K$100" + sLastRowsRange = Offset(sRowsRange, 1, , Height(sRowsRange) - 1) + ' If Columns = (1, 3) => sDuplicates = "=AND(TRUE;$A2=$A1;$C2=$C1) + sDuplicates = "=AND(TRUE" + For Each lColumn In Columns + sColumn = _GetColumnName(lColumn) + If CaseSensitive Then + sDuplicates = sDuplicates & ";$" & sColumn & "2=$" & sColumn & "1" + Else + sDuplicates = sDuplicates & ";UPPER($" & sColumn & "2)=UPPER($" & sColumn & "1)" + End If + Next lColumn + sDuplicates = sDuplicates & ")" + ClearValues(sLastRowsRange, sDuplicates, "ROW") + + '** Step 5: Compact or clear the rows in the initial range that are not retained in the final row numbers list + ' sFilter = "=ISNA(MATCH(ROW();$SF_WORK.$K$1:$K$100;0))" + sFilter = "=ISNA(MATCH(ROW();" & sRowsRange & ";0))" + Select Case UCase(Mode) + Case "COMPACT" + sRemove = CompactUp(oRangeAddress.RangeName, WholeRow := False, FilterFormula := sFilter) + If Header Then sRemove = Offset(sRemove, -1, 0, Height(sRemove) + 1) + Case "CLEAR" + ClearValues(oRangeAddress.RangeName, FilterFormula := sFilter, FilterScope := "ROW") + If Header Then sRemove = _ParseAddress(Range).RangeName Else sRemove = oRangeAddress.RangeName + End Select + + '** Housekeeping + RemoveSheet(sWorkSheet) + +Finally: + RemoveDuplicates = sRemove + ScriptForge.SF_Utils._ExitFunction(cstThisSub) + Exit Function +Catch: + GoTo Finally +End Function ' SFDocuments.SF_Calc.RemoveDuplicates + +REM ----------------------------------------------------------------------------- Public Function RemoveSheet(Optional ByVal SheetName As Variant) As Boolean ''' Remove an existing sheet from the document ''' Args: |