CodeGeeX4/langchain_demo/vectorize.py

25 lines
845 B
Python
Raw Normal View History

2024-07-05 01:33:53 +00:00
"""
Vectorize your local project
"""
import argparse
from utils.data import traverse
from utils.vector import vectorize
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('--workspace', type=str, help="directory of the workspace to be vectorized", default='.')
parser.add_argument('--chunk_size', type=int, help="chunk size when splitting", default=512)
parser.add_argument('--overlap_size', type=int, help="chunk overlap when splitting", default=32)
parser.add_argument('--batch_size', type=int, help="embedding batch size", default=16)
parser.add_argument('--output_path', type=str, help="path to save the vectors", default='vectors')
return parser.parse_args()
if __name__ == '__main__':
args = parse_arguments()
files = traverse(args.workspace)
vectorize(files, args)