@@ -18,9 +18,11 @@ package difflib
1818import (
1919 "bufio"
2020 "bytes"
21+ "errors"
2122 "fmt"
2223 "io"
2324 "strings"
25+ "unicode"
2426)
2527
2628func min (a , b int ) int {
@@ -44,6 +46,14 @@ func calculateRatio(matches, length int) float64 {
4446 return 1.0
4547}
4648
49+ func listifyString (str string ) (lst []string ) {
50+ lst = make ([]string , len (str ))
51+ for i , c := range str {
52+ lst [i ] = string (c )
53+ }
54+ return lst
55+ }
56+
4757type Match struct {
4858 A int
4959 B int
@@ -511,6 +521,227 @@ func (m *SequenceMatcher) RealQuickRatio() float64 {
511521 return calculateRatio (min (la , lb ), la + lb )
512522}
513523
524+ func count_leading (line string , ch byte ) (count int ) {
525+ // Return number of `ch` characters at the start of `line`.
526+ count = 0
527+ n := len (line )
528+ for (count < n ) && (line [count ] == ch ) {
529+ count ++
530+ }
531+ return count
532+ }
533+
534+ type Differ struct {
535+ Linejunk func (string ) bool
536+ Charjunk func (string ) bool
537+ }
538+
539+ func NewDiffer () * Differ {
540+ return & Differ {}
541+ }
542+
543+ func (d * Differ ) Compare (a []string , b []string ) (diffs []string , err error ) {
544+ // Compare two sequences of lines; generate the resulting delta.
545+
546+ // Each sequence must contain individual single-line strings ending with
547+ // newlines. Such sequences can be obtained from the `readlines()` method
548+ // of file-like objects. The delta generated also consists of newline-
549+ // terminated strings, ready to be printed as-is via the writeline()
550+ // method of a file-like object.
551+ diffs = []string {}
552+ cruncher := NewMatcherWithJunk (a , b , true , d .Linejunk )
553+ opcodes := cruncher .GetOpCodes ()
554+ for _ , current := range opcodes {
555+ alo := current .I1
556+ ahi := current .I2
557+ blo := current .J1
558+ bhi := current .J2
559+ var g []string
560+ if current .Tag == 'r' {
561+ g , _ = d .FancyReplace (a , alo , ahi , b , blo , bhi )
562+ } else if current .Tag == 'd' {
563+ g = d .Dump ("-" , a , alo , ahi )
564+ } else if current .Tag == 'i' {
565+ g = d .Dump ("+" , b , blo , bhi )
566+ } else if current .Tag == 'e' {
567+ g = d .Dump (" " , a , alo , ahi )
568+ } else {
569+ return nil , errors .New (fmt .Sprintf ("unknown tag %q" , current .Tag ))
570+ }
571+ diffs = append (diffs , g ... )
572+ }
573+ return diffs , nil
574+ }
575+
576+ func (d * Differ ) Dump (tag string , x []string , lo int , hi int ) (out []string ) {
577+ // Generate comparison results for a same-tagged range.
578+ out = []string {}
579+ for i := lo ; i < hi ; i ++ {
580+ out = append (out , fmt .Sprintf ("%s %s" , tag , x [i ]))
581+ }
582+ return out
583+ }
584+
585+ func (d * Differ ) PlainReplace (a []string , alo int , ahi int , b []string , blo int , bhi int ) (out []string , err error ) {
586+ if ! (alo < ahi ) || ! (blo < bhi ) { // assertion
587+ return nil , errors .New ("low greater than or equal to high" )
588+ }
589+ // dump the shorter block first -- reduces the burden on short-term
590+ // memory if the blocks are of very different sizes
591+ if bhi - blo < ahi - alo {
592+ out = d .Dump ("+" , b , blo , bhi )
593+ out = append (out , d .Dump ("-" , a , alo , ahi )... )
594+ } else {
595+ out = d .Dump ("-" , a , alo , ahi )
596+ out = append (out , d .Dump ("+" , b , blo , bhi )... )
597+ }
598+ return out , nil
599+ }
600+
601+ func (d * Differ ) FancyReplace (a []string , alo int , ahi int , b []string , blo int , bhi int ) (out []string , err error ) {
602+ // When replacing one block of lines with another, search the blocks
603+ // for *similar* lines; the best-matching pair (if any) is used as a
604+ // synch point, and intraline difference marking is done on the
605+ // similar pair. Lots of work, but often worth it.
606+
607+ // don't synch up unless the lines have a similarity score of at
608+ // least cutoff; best_ratio tracks the best score seen so far
609+ best_ratio := 0.74
610+ cutoff := 0.75
611+ cruncher := NewMatcherWithJunk (a , b , true , d .Charjunk )
612+ eqi := - 1 // 1st indices of equal lines (if any)
613+ eqj := - 1
614+ out = []string {}
615+
616+ // search for the pair that matches best without being identical
617+ // (identical lines must be junk lines, & we don't want to synch up
618+ // on junk -- unless we have to)
619+ var best_i , best_j int
620+ for j := blo ; j < bhi ; j ++ {
621+ bj := b [j ]
622+ cruncher .SetSeq2 (listifyString (bj ))
623+ for i := alo ; i < ahi ; i ++ {
624+ ai := a [i ]
625+ if ai == bj {
626+ if eqi == - 1 {
627+ eqi = i
628+ eqj = j
629+ }
630+ continue
631+ }
632+ cruncher .SetSeq1 (listifyString (ai ))
633+ // computing similarity is expensive, so use the quick
634+ // upper bounds first -- have seen this speed up messy
635+ // compares by a factor of 3.
636+ // note that ratio() is only expensive to compute the first
637+ // time it's called on a sequence pair; the expensive part
638+ // of the computation is cached by cruncher
639+ if cruncher .RealQuickRatio () > best_ratio &&
640+ cruncher .QuickRatio () > best_ratio &&
641+ cruncher .Ratio () > best_ratio {
642+ best_ratio = cruncher .Ratio ()
643+ best_i = i
644+ best_j = j
645+ }
646+ }
647+ }
648+ if best_ratio < cutoff {
649+ // no non-identical "pretty close" pair
650+ if eqi == - 1 {
651+ // no identical pair either -- treat it as a straight replace
652+ out , _ = d .PlainReplace (a , alo , ahi , b , blo , bhi )
653+ return out , nil
654+ }
655+ // no close pair, but an identical pair -- synch up on that
656+ best_i = eqi
657+ best_j = eqj
658+ best_ratio = 1.0
659+ } else {
660+ // there's a close pair, so forget the identical pair (if any)
661+ eqi = - 1
662+ }
663+ // a[best_i] very similar to b[best_j]; eqi is None iff they're not
664+ // identical
665+
666+ // pump out diffs from before the synch point
667+ out = append (out , d .fancyHelper (a , alo , best_i , b , blo , best_j )... )
668+
669+ // do intraline marking on the synch pair
670+ aelt , belt := a [best_i ], b [best_j ]
671+ if eqi == - 1 {
672+ // pump out a '-', '?', '+', '?' quad for the synched lines
673+ var atags , btags string
674+ cruncher .SetSeqs (listifyString (aelt ), listifyString (belt ))
675+ opcodes := cruncher .GetOpCodes ()
676+ for _ , current := range opcodes {
677+ ai1 := current .I1
678+ ai2 := current .I2
679+ bj1 := current .J1
680+ bj2 := current .J2
681+ la , lb := ai2 - ai1 , bj2 - bj1
682+ if current .Tag == 'r' {
683+ atags += strings .Repeat ("^" , la )
684+ btags += strings .Repeat ("^" , lb )
685+ } else if current .Tag == 'd' {
686+ atags += strings .Repeat ("-" , la )
687+ } else if current .Tag == 'i' {
688+ btags += strings .Repeat ("+" , lb )
689+ } else if current .Tag == 'e' {
690+ atags += strings .Repeat (" " , la )
691+ btags += strings .Repeat (" " , lb )
692+ } else {
693+ return nil , errors .New (fmt .Sprintf ("unknown tag %q" ,
694+ current .Tag ))
695+ }
696+ }
697+ out = append (out , d .QFormat (aelt , belt , atags , btags )... )
698+ } else {
699+ // the synch pair is identical
700+ out = append (out , " " + aelt )
701+ }
702+ // pump out diffs from after the synch point
703+ out = append (out , d .fancyHelper (a , best_i + 1 , ahi , b , best_j + 1 , bhi )... )
704+ return out , nil
705+ }
706+
707+ func (d * Differ ) fancyHelper (a []string , alo int , ahi int , b []string , blo int , bhi int ) (out []string ) {
708+ if alo < ahi {
709+ if blo < bhi {
710+ out , _ = d .FancyReplace (a , alo , ahi , b , blo , bhi )
711+ } else {
712+ out = d .Dump ("-" , a , alo , ahi )
713+ }
714+ } else if blo < bhi {
715+ out = d .Dump ("+" , b , blo , bhi )
716+ } else {
717+ out = []string {}
718+ }
719+ return out
720+ }
721+
722+ func (d * Differ ) QFormat (aline string , bline string , atags string , btags string ) (out []string ) {
723+ // Format "?" output and deal with leading tabs.
724+
725+ // Can hurt, but will probably help most of the time.
726+ common := min (count_leading (aline , '\t' ), count_leading (bline , '\t' ))
727+ common = min (common , count_leading (atags [:common ], ' ' ))
728+ common = min (common , count_leading (btags [:common ], ' ' ))
729+ atags = strings .TrimRightFunc (atags [common :], unicode .IsSpace )
730+ btags = strings .TrimRightFunc (btags [common :], unicode .IsSpace )
731+
732+ out = []string {"- " + aline }
733+ if len (atags ) > 0 {
734+ out = append (out , fmt .Sprintf ("? %s%s\n " ,
735+ strings .Repeat ("\t " , common ), atags ))
736+ }
737+ out = append (out , "+ " + bline )
738+ if len (btags ) > 0 {
739+ out = append (out , fmt .Sprintf ("? %s%s\n " ,
740+ strings .Repeat ("\t " , common ), btags ))
741+ }
742+ return out
743+ }
744+
514745// Convert range to the "ed" format
515746func formatRangeUnified (start , stop int ) string {
516747 // Per the diff spec at http://www.unix.org/single_unix_specification/
0 commit comments