vikunja-api/vendor/github.com/client9/misspell/replace.go
2018-10-28 17:24:10 +01:00

246 lines
5.9 KiB
Go

package misspell
import (
"bufio"
"bytes"
"io"
"regexp"
"strings"
"text/scanner"
)
func max(x, y int) int {
if x > y {
return x
}
return y
}
func inArray(haystack []string, needle string) bool {
for _, word := range haystack {
if needle == word {
return true
}
}
return false
}
var wordRegexp = regexp.MustCompile(`[a-zA-Z0-9']+`)
// Diff is datastructure showing what changed in a single line
type Diff struct {
Filename string
FullLine string
Line int
Column int
Original string
Corrected string
}
// Replacer is the main struct for spelling correction
type Replacer struct {
Replacements []string
Debug bool
engine *StringReplacer
corrected map[string]string
}
// New creates a new default Replacer using the main rule list
func New() *Replacer {
r := Replacer{
Replacements: DictMain,
}
r.Compile()
return &r
}
// RemoveRule deletes existings rules.
// TODO: make inplace to save memory
func (r *Replacer) RemoveRule(ignore []string) {
newwords := make([]string, 0, len(r.Replacements))
for i := 0; i < len(r.Replacements); i += 2 {
if inArray(ignore, r.Replacements[i]) {
continue
}
newwords = append(newwords, r.Replacements[i:i+2]...)
}
r.engine = nil
r.Replacements = newwords
}
// AddRuleList appends new rules.
// Input is in the same form as Strings.Replacer: [ old1, new1, old2, new2, ....]
// Note: does not check for duplictes
func (r *Replacer) AddRuleList(additions []string) {
r.engine = nil
r.Replacements = append(r.Replacements, additions...)
}
// Compile compiles the rules. Required before using the Replace functions
func (r *Replacer) Compile() {
r.corrected = make(map[string]string, len(r.Replacements)/2)
for i := 0; i < len(r.Replacements); i += 2 {
r.corrected[r.Replacements[i]] = r.Replacements[i+1]
}
r.engine = NewStringReplacer(r.Replacements...)
}
/*
line1 and line2 are different
extract words from each line1
replace word -> newword
if word == new-word
continue
if new-word in list of replacements
continue
new word not original, and not in list of replacements
some substring got mixed up. UNdo
*/
func (r *Replacer) recheckLine(s string, lineNum int, buf io.Writer, next func(Diff)) {
first := 0
redacted := RemoveNotWords(s)
idx := wordRegexp.FindAllStringIndex(redacted, -1)
for _, ab := range idx {
word := s[ab[0]:ab[1]]
newword := r.engine.Replace(word)
if newword == word {
// no replacement done
continue
}
// ignore camelCase words
// https://github.com/client9/misspell/issues/113
if CaseStyle(word) == CaseUnknown {
continue
}
if StringEqualFold(r.corrected[strings.ToLower(word)], newword) {
// word got corrected into something we know
io.WriteString(buf, s[first:ab[0]])
io.WriteString(buf, newword)
first = ab[1]
next(Diff{
FullLine: s,
Line: lineNum,
Original: word,
Corrected: newword,
Column: ab[0],
})
continue
}
// Word got corrected into something unknown. Ignore it
}
io.WriteString(buf, s[first:])
}
// ReplaceGo is a specialized routine for correcting Golang source
// files. Currently only checks comments, not identifiers for
// spelling.
func (r *Replacer) ReplaceGo(input string) (string, []Diff) {
var s scanner.Scanner
s.Init(strings.NewReader(input))
s.Mode = scanner.ScanIdents | scanner.ScanFloats | scanner.ScanChars | scanner.ScanStrings | scanner.ScanRawStrings | scanner.ScanComments
lastPos := 0
output := ""
Loop:
for {
switch s.Scan() {
case scanner.Comment:
origComment := s.TokenText()
newComment := r.engine.Replace(origComment)
if origComment != newComment {
// s.Pos().Offset is the end of the current token
// subtract len(origComment) to get the start of the token
offset := s.Pos().Offset
output = output + input[lastPos:offset-len(origComment)] + newComment
lastPos = offset
}
case scanner.EOF:
break Loop
}
}
if lastPos == 0 {
// no changes, no copies
return input, nil
}
if lastPos < len(input) {
output = output + input[lastPos:]
}
diffs := make([]Diff, 0, 8)
buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100))
// faster that making a bytes.Buffer and bufio.ReadString
outlines := strings.SplitAfter(output, "\n")
inlines := strings.SplitAfter(input, "\n")
for i := 0; i < len(inlines); i++ {
if inlines[i] == outlines[i] {
buf.WriteString(outlines[i])
continue
}
r.recheckLine(inlines[i], i+1, buf, func(d Diff) {
diffs = append(diffs, d)
})
}
return buf.String(), diffs
}
// Replace is corrects misspellings in input, returning corrected version
// along with a list of diffs.
func (r *Replacer) Replace(input string) (string, []Diff) {
output := r.engine.Replace(input)
if input == output {
return input, nil
}
diffs := make([]Diff, 0, 8)
buf := bytes.NewBuffer(make([]byte, 0, max(len(input), len(output))+100))
// faster that making a bytes.Buffer and bufio.ReadString
outlines := strings.SplitAfter(output, "\n")
inlines := strings.SplitAfter(input, "\n")
for i := 0; i < len(inlines); i++ {
if inlines[i] == outlines[i] {
buf.WriteString(outlines[i])
continue
}
r.recheckLine(inlines[i], i+1, buf, func(d Diff) {
diffs = append(diffs, d)
})
}
return buf.String(), diffs
}
// ReplaceReader applies spelling corrections to a reader stream. Diffs are
// emitted through a callback.
func (r *Replacer) ReplaceReader(raw io.Reader, w io.Writer, next func(Diff)) error {
var (
err error
line string
lineNum int
)
reader := bufio.NewReader(raw)
for err == nil {
lineNum++
line, err = reader.ReadString('\n')
// if it's EOF, then line has the last line
// don't like the check of err here and
// in for loop
if err != nil && err != io.EOF {
return err
}
// easily 5x faster than regexp+map
if line == r.engine.Replace(line) {
io.WriteString(w, line)
continue
}
// but it can be inaccurate, so we need to double check
r.recheckLine(line, lineNum, w, next)
}
return nil
}