https://github.com/xiaojiong/scanfile
演示站点: http://www.weigongkai.com/ 7G数据 2s完成扫描package scanfile/*#include#include #include int IndexStr(char *haystack, char *needle,unsigned int begin) { char *p = strstr(haystack+begin, needle); if (p) return p - haystack; return -1;}int IndexChar(char *haystack, char c,unsigned int begin) { char *p = haystack = haystack + begin; while(*p != '') { if(*p == c) { return p - haystack;}++p;} return -1;}int LastIndexChar(char *haystack, char c,unsigned int begin) { int len = strlen(haystack); if(begin > 0) { if (begin > len) { return -1;} } else { begin = len - 1;} haystack +=begin; while(1) { if(*haystack == c) { return begin;} if(begin == 0) { return -1;}--haystack;--begin;} return -1;}*/import"C"import"unsafe"func strScan(str *string, key *string, counter *Counter) []string { begin := 0 CStr := C.CString(*str) Ckey := C.CString(*key) defer func() {C.free(unsafe.Pointer(CStr))C.free(unsafe.Pointer(Ckey))}() var res []string for { var index int = 0 if index = int(C.IndexStr(CStr, Ckey, C.uint(begin))); index == -1 {break} var startIndex int = 0 if index > 0 { if pos := int(C.LastIndexChar(CStr, 'n', C.uint(index))); pos != -1 { startIndex = pos + 1}} var endIndex int = len(*str) if pos := int(C.IndexChar(CStr, 'n', C.uint(index))); pos != -1 { endIndex = pos + index} begin = endIndex if counter.IsMax() {break} res = append(res, (*str)[startIndex:endIndex])counter.Add() if begin == len(*str) {break}} return res}
package scanfileimport ("io""os""sync")var LineFeed = byte('n') //文本换行符标识var BufSize = 1024 * 1024 // buf大小func Scan(files []string, searchStr *string) string { var result ScanResult//计数器 counter := InitCounter(10)//扫描结果输出通道 out := make(chan *FileRes, 10) fileCount := len(files) for i := 0; i < fileCount; i++ { go ScanFile(files[i], searchStr, counter, out)} for i := 0; i < fileCount; i++ {result.AddFileRes(<-out)}result.AddCounter(counter) return result.ToJson()}func ScanFile(fileName string, searchStr *string, counter *Counter, out chan *FileRes) { //文件 IO fileContentChan := fileRead(fileName, counter) fileRes := InitFileRes(fileName) //使用多路复用 wg防止线程泄漏 wg := sync.WaitGroup{} for i := 0; i < 3; i++ {wg.Add(1) go func() { for { if text, ok := <-fileContentChan; ok { if counter.IsMax() {//清空未读取channelclearFileContentChan(fileContentChan)break } else { if counter.IsMax() {break} rs := strScan(text, searchStr, counter) for i := 0; i < len(rs); i++ {fileRes.Add(rs[i])}} } else {break}}wg.Done()}()}wg.Wait() out <- fileRes}func clearFileContentChan(c chan *string) { for { if _, ok := <-c; ok == false {break}}}func fileRead(fileName string, counter *Counter) chan *string { fileContentChan := make(chan *string, 5) go func() { fh, err := os.Open(fileName) if err != nil {panic(err)}//异常处理 defer fh.Close() buf := make([]byte, BufSize) var start int64 fh.Seek(start, 0) for { //超过计数器最大返回值 跳出程序 if counter.IsMax() {break} n, err := fh.Read(buf) if err != nil && err != io.EOF {panic(err)} if n == 0 {break} l := lastByteIndex(buf, LineFeed) content := string(buf[0 : l+1]) start += int64(l + 1) fh.Seek(start, 0) fileContentChan <- &content}close(fileContentChan)}() return fileContentChan}func lastByteIndex(s []byte, sep byte) int { for i := len(s) - 1; i >= 0; i-- { if s[i] == sep { return i}} return -1}