


Graccvs组件智能分析提取其他各种文件中文本,为自然语言信息检索,机器学习等高端技术提供底层支持的技术组件,是Lucene/CLucene, Elasticsearch, Sphinx等全文检索工具,OA, ERP, CRM,网盘,文件管理等其他系统提供文件摘要及搜索前置服务。同时也可以为安全网关,邮件内容监控,内网安全等系统提供文件搜索及监控服务底层技术支持。

组件支持常见各种文件格式”.pdf", ".doc", ".odt", ".docx", ".dotm", ".docm", ".wps", ".xls", ".xlsx", ".xlsm", ".xltm", ".et", ".ppt", ".pptx", ".potm", ".pptm", ".ppsm", ".dps", ".ofd"(电子发票版式文件), ".rtf",".html", ".htm", ".mht", ".mhtml", ".eml", ".emmx", "xmind", "gmind", ".chm", ".zip" 等。

Graccvs组件是以xcframework格式提供的,点击这里下载。 同时网站提供XCode工程示例说明​​​​​​, 点击这里查看说明。


3: 主单元(示例中为:ViewController.h)增加头文件和定义GraccvsGraccvsLib类,如下:
    @property (strong, nonatomic) GraccvsGraccvsLib *hx;
7:完成文件提取任务后调用 Unload函数,释放资源组件使用的资源。

主要代码单元函数定义 ViewController.h:

//  ViewController.h
//  libTest
//  Created by graccvs on 2021/10/31.
@interface ViewController : UIViewController
@property (strong, nonatomic) GraccvsGraccvsLib *hx;
@property (nonatomic,retain)IBOutlet UIButton *btToString;
@property (nonatomic,retain)IBOutlet UIButton *btToText;
@property (nonatomic,retain)IBOutlet UIButton *btHttpToString;
@property (nonatomic,retain)IBOutlet UIButton *btHttpToText;
@property (nonatomic,retain)IBOutlet UITextView *textView1;
- (void)alertX:(NSString *)msg;
- (IBAction) toString:(id)obj;
- (IBAction) toText:(id)obj;


//  ViewController.m
//  libTest
//  Created by graccvs on 2021/10/31.
#import "ViewController.h"
@interface ViewController ()
// 根据错误类型返回错误信息
NSString  *errorDesc(int32_t code)
    switch (code)
    case 0:
        return @"ok";
    case 1:
        return @"未知错误";
    case 2:
        return @"提取源文件不存在";
    case 3:
        return @"保存目标文件失败";
    case 4:
        return @"提取的源文件超出设置的大小范围";
    case 5:
        return @"不支持的提取文件格式";
    case 6:
        return @"得到接口失败";
    case 7 :
        return @"HTTP下载文件失败";
    case 8 :
        return @"HTTP文件为空";
    case 9:
        return @"软件许可错误";
        return @"未知错误2";
@implementation ViewController
- (void)viewDidLoad {
    [super viewDidLoad];
    NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES);
    NSString *temp = [paths objectAtIndex:0];
    self.hx = [GraccvsGraccvsLib new];
    [self.hx load: temp];
    NSString *sn = @"";
    [self.hx auth:@"Beij Gaya" licText:sn];
- (void)viewDidUnload {
    [self.hx unload];
    [super viewDidUnload];
// 提示信息
- (void)alertX:(NSString *)msg {
    UIAlertController *alert = [UIAlertController alertControllerWithTitle:@"提示" message:msg preferredStyle:UIAlertControllerStyleAlert];
    [alert addAction:[UIAlertAction actionWithTitle:@"确定" style:UIAlertActionStyleDefault handler:nil]];
    // d出对话框
    [self presentViewController:alert animated:true completion:nil];
// ------------------------提取正文,返回字符串------------------------
- (IBAction)toString:(id)obj {
    NSBundle *bundle = [NSBundle bundleWithPath:[[NSBundle mainBundle] pathForResource:@"testFiles" ofType:@"bundle"]];
    NSString *fn = [bundle pathForResource:@"Adobe Intro" ofType:@".ofd"];
    //const char * c1 =[filePath UTF8String];
    NSFileManager *fileManager = [NSFileManager defaultManager];
    if (![fileManager fileExistsAtPath:fn]) {
        [self alertX: @"file not exists!"];
    NSString *s = [self.hx toString:fn]; 
    if ([s hasPrefix:@"@ErrCode"] == 0)
        self.textView1.text = s;
        NSString *err = [self.hx lastErr];
        self.textView1.text = err;
// ------------------------提取正文并保存为文本文件------------------------
- (IBAction)toText:(id)obj {
    NSBundle *bundle = [NSBundle bundleWithPath:[[NSBundle mainBundle] pathForResource:@"testFiles" ofType:@"bundle"]];
    NSString *fn = [bundle pathForResource:@"简可信模板OCR识别工具帮助" ofType:@".docx"];
    NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES);
    NSString *documentsDirectory = [paths objectAtIndex:0];
    NSString *outTextFile = [documentsDirectory stringByAppendingPathComponent:@"out001.txt"];
    int32_t code = [self.hx toTextFile:fn outFile:outTextFile];
    if(code == 0){
        NSString *txtData = [NSString stringWithContentsOfFile:outTextFile encoding:NSUTF8StringEncoding error:nil];
        self.textView1.text = txtData;
        //根据错误代码得到错误信息, 也可以使用 [self.hx lastErr]得到错误信息
        self.textView1.text = errorDesc(code);
// ------------------------HTTP提取正文,返回字符串------------------------
- (IBAction)httpToString:(id)obj {
    NSString *url = @"https://www.gaya-soft.cn/dfs/v2/graccvs文件正文提取接口.pdf";
    NSString *params2  = @"{\"headers\":[{\"client_id\": \"g01x9\"}, {\"client_secret\": \"e23c89cc9fe\"}], \"cookies\":[{\"name\": \"ga\", \"value\": \"1020\", \"expires\":36000000, \"path\": \"/\"}]}";
    //调用httpToString函数, Timeout为超时设置,单位为毫秒
    NSString *s = [self.hx httpToString:url fileExt:@".pdf" timeout:180*1000 params:params2];
    if ([s hasPrefix:@"@ErrCode"] == 0)
        self.textView1.text = s;
        NSString *err = [self.hx lastErr];
        self.textView1.text = err;
// ------------------------HTTP提取正文并保存为文本文件------------------------
- (IBAction)httpToTextFile:(id)obj {
    NSString *url = @"https://www.gaya-soft.cn/dfs/v2/简可信模板OCR识别工具帮助.docx";
    NSArray *paths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES);
    NSString *documentsDirectory = [paths objectAtIndex:0];
    NSString *outFile = [documentsDirectory stringByAppendingPathComponent:@"out002.txt"];
    //调用httpToTextFile函数, Timeout为超时设置,单位为毫秒
    int32_t code = [self.hx httpToTextFile:url fileExt:@".docx" outTxtFile:outFile timeout: 0  params:@""];
    if(code == 0){
        NSString *txtData = [NSString stringWithContentsOfFile:outFile encoding:NSUTF8StringEncoding error:nil];
        self.textView1.text = txtData;
        //根据错误代码得到错误信息, 也可以使用 [self.hx lastErr]得到错误信息
        self.textView1.text = errorDesc(code);

函数定义 Graccvs.xcframework\ios-arm64\Graccvs.framework\Versions\A\Headers\Graccvs.objc.h:

// Objective-C API for talking to graccvs Go package.
//   gobind -lang=objc graccvs
// File is generated by gobind. Do not edit.
#ifndef __Graccvs_H__
#define __Graccvs_H__
@import Foundation;
#include "ref.h"
#include "Universe.objc.h"
@class GraccvsGraccvsLib;
@interface GraccvsGraccvsLib : NSObject  {
@property(strong, readonly) _Nonnull id _ref;
- (nonnull instancetype)initWithRef:(_Nonnull id)ref;
- (nonnull instancetype)init;
 * soft register
- (int32_t)auth:(NSString* _Nullable)corp licText:(NSString* _Nullable)licText;
 * get http file and extract the text
"url" http/https url of file
"fileExt" extract text as this suffix
"timeout" millisecond, timeout parameter of get http file,
- (NSString* _Nonnull)httpToString:(NSString* _Nullable)url fileExt:(NSString* _Nullable)fileExt timeout:(int32_t)timeout params:(NSString* _Nullable)params;
 * get http file, extract the text and save to file
"url" http/https url of file
"fileExt" extract text as this suffix
"outTxtFile" output filename
"timeout" millisecond, timeout parameter of get http file,
"params" is http Headers and cookies
- (int32_t)httpToTextFile:(NSString* _Nullable)url fileExt:(NSString* _Nullable)fileExt outTxtFile:(NSString* _Nullable)outTxtFile timeout:(int32_t)timeout params:(NSString* _Nullable)params;
 * last error
- (NSString* _Nonnull)lastErr;
 * dynamic library init
"tempdir" is temporary folder with write right
- (void)load:(NSString* _Nullable)tempdir;
- (void)logTest:(NSString* _Nullable)inFile text:(NSString* _Nullable)text;
 * extract file text and return string
- (NSString* _Nonnull)toString:(NSString* _Nullable)inFile;
 * extract file text and save to text file
"inFile" is input filename, "outFile" is output filename
- (int32_t)toTextFile:(NSString* _Nullable)inFile outFile:(NSString* _Nullable)outFile;
 * call this function before exit
- (void)unload;





原文地址: https://www.outofmemory.cn/web/993863.html

打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-05-21
下一篇 2022-05-21



