⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 diffengine.vb

📁 an lcs algorithm for finding common string
💻 VB
字号:
Namespace We.See.Diff

    ''' <summary>
    ''' We See Diff uses the LCS algorithm to determine the differences between two arbitrary lists
    ''' the list of differences will later be translated to a list of modifications
    ''' </summary>
    ''' <remarks></remarks>
    Public Class DiffEngine(Of T)

        ' A possible extension of this class is making the index process optional
        ' this would speed up performance in very small collections that are very different
        ' for now, this is assumed to make so small a difference that the added complexity
        ' weighs more than the gained performance

        ''' <summary>
        ''' This function compares two list of type T
        ''' and determines the modifications required on list1 to change it to list2
        ''' it is guaranteed to return a DiffScript class, with size 0 when both lists are equal
        ''' </summary>
        ''' <param name="list1"></param>
        ''' <param name="list2"></param>
        ''' <returns>a DiffScript containing modification descriptions</returns>
        ''' <remarks></remarks>
        Public Shared Function MakeDiffScript(ByVal list1 As T(), ByVal list2 As T()) As DiffScript(Of T)

            Dim dataIndex As Index(Of T)

            ' index the lists
            dataIndex = Index(Of T).Build(list1, list2)

            Dim result As DiffScript(Of T)

            ' when both lists are equal, return an empty diffscript
            If dataIndex.ListsEqaul Then
                result = New DiffScript(Of T)
            Else

                result = FindLCS(dataIndex)

                ' the diffscript created by FindLCS only contains add and remove
                ' this is translated to add, remove, replace and move by calling Interpret
                'result.Interpret()

            End If

            Return result
        End Function

        Private Shared Function FindLCS(ByVal index As Index(Of T)) As DiffScript(Of T)

            Dim s As Integer = index.StartSize

            Dim list1 As Integer() = index.Index1
            Dim list2 As Integer() = index.Index2

            Dim length1 As Integer = index.Block1Length
            Dim length2 As Integer = index.Block2Length

            Dim lcsgraph(,) As Integer = New Integer(length1 + 1, length2 + 1) {}
            Dim i, j As Integer

            ' Build the LCS graph
            ' this creates a 2 dimentional array of the gain of following a route in that array
            For i = 1 To length1
                For j = 1 To length2
                    If list1(s + i - 1) = list2(s + j - 1) Then
                        lcsgraph(i, j) = lcsgraph(i - 1, j - 1) + 1
                    Else
                        lcsgraph(i, j) = Math.Max(lcsgraph(i - 1, j - 1), _
                                         Math.Max(lcsgraph(i - 1, j), lcsgraph(i, j - 1)))
                    End If
                Next
            Next

            ' when the array is completely filled, the route with most copy actions (longest consecutive sequence or lcs)
            ' is identified backtracking the route which results in the highest value
            i = length1
            j = length2
            Dim result As New DiffScript(Of T)
            'Dim subseq As String = ""
            Dim favorAdd As Boolean
            ' this algorithm is designed to prepare for detection of replacements
            ' if we keep the original alghorithm, we end up with 
            ' ---+++ when 3 characters are replaced. the favorAdd boolean
            ' instructs the backtrack that when we've had a - operation, and we can
            ' perform a + operation right after, at the same cost as another - operation,
            ' favor the + operation, to perform the - operation after that again
            ' this simplefies detection of replacements as they will appear in the form of
            ' -+-+ instead
            ' it still favors - as the first modification in a list
            Do While (i > 0 Or j > 0)
                If i > 0 AndAlso j > 0 AndAlso list1(s + i - 1) = list2(s + j - 1) Then
                    i -= 1
                    j -= 1
                    'subseq = " " & index.UniqueValues(list1(s + i)).ToString & vbCrLf & subseq
                    favorAdd = False
                Else
                    If Not favorAdd Then
                        If i > 0 AndAlso (j = 0 OrElse lcsgraph(i - 1, j) >= lcsgraph(i, j)) Then
                            i -= 1
                            'subseq = "-" & index.UniqueValues(list1(s + i)).ToString & vbCrLf & subseq
                            'result.Add(New DiffScript.Modification(DiffScript.Action.Remove, index.UniqueValues(list1(s + i)).ToString, s + i, 0))
                            result.Add(New DiffScript(Of T).Modification(DiffScript(Of T).Operation.Remove, index.UniqueValues(list1(s + i)), s + i))
                            favorAdd = True
                        Else
                            j -= 1
                            'subseq = "+" & index.UniqueValues(list2(s + j)).ToString & vbCrLf & subseq
                            'result.Add(New DiffScript.Modification(DiffScript.Action.Add, index.UniqueValues(list2(s + j)).ToString, 0, s + j))
                            result.Add(New DiffScript(Of T).Modification(DiffScript(Of T).Operation.Add, index.UniqueValues(list2(s + j)), s + i))
                        End If
                    Else
                        If j > 0 AndAlso (i = 0 OrElse lcsgraph(i, j - 1) >= lcsgraph(i, j)) Then
                            j -= 1
                            result.Add(New DiffScript(Of T).Modification(DiffScript(Of T).Operation.Add, index.UniqueValues(list2(s + j)), s + i))
                        Else
                            i -= 1
                            result.Add(New DiffScript(Of T).Modification(DiffScript(Of T).Operation.Remove, index.UniqueValues(list1(s + i)), s + i))
                        End If
                        favorAdd = False
                    End If
                End If
            Loop

            Return result
        End Function

    End Class

End Namespace

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -