diff --git a/feature_extractor.py b/feature_extractor.py
new file mode 100644
index 0000000..87df51a
--- /dev/null
+++ b/feature_extractor.py
@@ -0,0 +1,276 @@
+import csv
+import os
+import sys
+import cv2
+import subprocess
+import re
+import numpy
+# import tensorflow as tf
+import math
+import numpy as np
+
+import h5py
+
+# In OpenCV3.X, this is available as cv2.CAP_PROP_POS_MSEC
+# In OpenCV2.X, this is available as cv2.cv.CV_CAP_PROP_POS_MSEC
+CAP_PROP_POS_MSEC = 0
+
+
+class Extractor:
+    def __init__(self,video_file, segment_len, frame_rate):
+        self.video_path = video_file
+        self.segment_len = segment_len
+        self.frame_rate = frame_rate
+
+    def __calc_frames(self):
+        process = subprocess.Popen(['ffmpeg', '-i', self.video_path], stdout=subprocess.PIPE,
+                                   stderr=subprocess.STDOUT)
+        stdout, stderr = process.communicate()
+        
+        #testing comment 
+        matches = re.search(r"Duration:\s{1}(?P<hours>\d+?):(?P<minutes>\d+?):(?P<seconds>\d+\.\d+?),",stdout.decode('utf-8'), re.DOTALL).groupdict()
+        fps = float(re.findall(r"\d*\.?\d* fps,", stdout.decode('utf-8'))[0].split(' ')[0].rstrip("'").lstrip("'"))
+
+        video_len = ((int(matches['hours']) * 3600) + (int(matches['minutes']) * 60) + float(matches['seconds']))
+        total_frames = int(video_len * self.frame_rate)
+        return total_frames
+
+    def __calc_segments(self, total_frames):
+        # total_frames = self.__calc_frames()
+        segments = math.ceil(total_frames / (self.frame_rate * self.segment_len))
+        return segments
+
+    def __frame_iterator(self, max_prev_frames, every_ms=1000, max_num_frames=360):
+        """Uses OpenCV to iterate over all frames of filename at a given frequency.
+
+        Args:
+          filename: Path to video file (e.g. mp4)
+          every_ms: The duration (in milliseconds) to pick between frames.
+          max_num_frames: Maximum number of frames to process, taken from the
+            beginning of the video.
+
+        Yields:
+          RGB frame with shape (image height, image width, channels)
+        """
+        video_capture = cv2.VideoCapture()
+        if not video_capture.open(self.video_path):
+            print(sys.stderr, 'Error: Cannot open video file ' + self.video_path)
+            return
+        last_ts = -99999  # The timestamp of last retrieved frame.
+        num_retrieved = 0
+
+        while num_retrieved <= max_num_frames:
+            # Skip frames
+            while video_capture.get(CAP_PROP_POS_MSEC) < every_ms + last_ts:
+                if not video_capture.read()[0]:
+                    return
+
+            last_ts = video_capture.get(CAP_PROP_POS_MSEC)
+            has_frames, frame = video_capture.read()
+            if not has_frames:
+                break
+            if num_retrieved >= max_prev_frames:
+                yield frame
+                # num_retrieved += 1
+            num_retrieved += 1
+
+    def __quantize(self, features, min_quantized_value=-2.0, max_quantized_value=2.0):
+        """Quantizes float32 `features` into string."""
+        assert features.dtype == 'float32'
+        assert len(features.shape) == 1  # 1-D array
+        features = numpy.clip(features, min_quantized_value, max_quantized_value)
+        quantize_range = max_quantized_value - min_quantized_value
+        features = (features - min_quantized_value) * (255.0 / quantize_range)
+        features = [int(round(f)) for f in features]
+
+        return features
+
+    def __extract_features(self):
+        total_error = 0
+        frames = self.__calc_frames()
+        seg = self.__calc_segments(frames)
+        seg_features = {}
+
+        for iter in range(seg):
+            rgb_features = []
+            sum_rgb_features = None
+            prev_max_frames= iter * self.segment_len
+            if frames > prev_max_frames:
+                for rgb in self.__frame_iterator(every_ms=1000.0 / self.frame_rate, max_prev_frames=prev_max_frames, max_num_frames=prev_max_frames + self.segment_len):
+                    features = self.extractor.extract_rgb_frame_features(rgb[:, :, ::-1])
+                    if sum_rgb_features is None:
+                        sum_rgb_features = features
+                    else:
+                        sum_rgb_features += features
+                    rgb_features.append(self.__quantize(features))
+
+                    if not rgb_features:
+                        print >> sys.stderr, 'Could not get features for ' + self.video_path
+                        total_error +=1
+                        continue
+                mean_rgb_features = sum_rgb_features / len(rgb_features)
+                seg_features['seg'+ str(iter +1)] = mean_rgb_features
+
+        return seg_features
+
+
+    def write_h5(self, file_name):
+        features = self.__extract_features()
+        print("*******feature shape ********", features['seg1'].shape)
+        dt = h5py.special_dtype(vlen=str)
+        h5f = h5py.File("/home/khawar/Documents/AutoEncoder/Samran_Code/"+file_name, 'w')
+        h5f.create_dataset('id', data=self.video_path, dtype=dt)
+        h5f.create_dataset('mean_rgb', data=np.array(list(features.values()), dtype=float))
+        h5f.create_dataset('seg_num', data=len(features), dtype=int)
+        h5f.close()
+        print("features written")
+
+# same function as above but it also returns a list of frames for each segment 
+    def write_h5_and_return_frames(self, file_name):
+        video_frame , features = self.segment_video_extract_features() # note this function returns extracted featuers plus video frames
+        print("*******feature shape ********", features['seg1'].shape)
+        dt = h5py.special_dtype(vlen=str)
+        h5f = h5py.File("/home/khawar/Documents/AutoEncoder/Samran_Code/"+file_name, 'w')
+        h5f.create_dataset('id', data=self.video_path, dtype=dt)
+        h5f.create_dataset('mean_rgb', data=np.array(list(features.values()), dtype=float))
+        h5f.create_dataset('seg_num', data=len(features), dtype=int)
+        h5f.close()
+        print("features written")
+        return video_frame
+
+    def load_vid_data(self, width, height, normalize = True):
+        total_error = 0
+        frames = self.__calc_frames()
+        seg = self.__calc_segments(frames)
+        seg_features = {}
+        dim = (width, height)
+        for iter in range(seg):
+            rgb_frames = []
+            sum_rgb_features = None
+            prev_max_frames= iter * self.segment_len
+            if frames > prev_max_frames:
+                for rgb in self.__frame_iterator(every_ms=1000.0 / self.frame_rate, max_prev_frames=prev_max_frames, max_num_frames=prev_max_frames + self.segment_len):
+                    # resizing images
+                    rgb = cv2.resize(rgb, dim, interpolation = cv2.INTER_AREA)
+                    
+                    if normalize:
+                        # normalizing the frames
+                        rgb = rgb/255.0
+
+                    rgb_frames.append(rgb)
+
+        rgb_img = np.array(rgb_frames)
+        return rgb_img
+
+
+    def load_segmentated_video(self):
+        total_error = 0
+        frames = self.__calc_frames()
+        seg = self.__calc_segments(frames)
+        seg_features = {}
+       
+        total_segs_rgb = []
+        for iter in range(seg):
+            rgb_features = []
+            rgb_frames = []
+            sum_rgb_features = None
+            prev_max_frames= iter * self.segment_len
+            if frames > prev_max_frames:
+                for rgb in self.__frame_iterator(every_ms=1000.0 / self.frame_rate, max_prev_frames=prev_max_frames, max_num_frames=prev_max_frames + self.segment_len):
+                    features = self.extractor.extract_rgb_frame_features(rgb[:, :, ::-1])
+                    rgb_frames.append(rgb)
+
+                    if sum_rgb_features is None:
+                        sum_rgb_features = features
+                    else:
+                        sum_rgb_features += features
+                    rgb_features.append(self.__quantize(features))
+
+                    if not rgb_features:
+                        print >> sys.stderr, 'Could not get features for ' + self.video_path
+                        total_error +=1
+                        continue
+                mean_rgb_features = sum_rgb_features / len(rgb_features)
+                seg_features['seg'+ str(iter +1)] = mean_rgb_features
+                rgb_img= np.array(rgb_frames)
+                total_segs_rgb.append(rgb_img) 
+            
+
+        print("segments +++++ ", seg)
+        print("total_seg_rgb +++++ ",len(total_segs_rgb))
+
+        return total_segs_rgb
+
+    def segment_video_extract_features(self):
+        total_error = 0
+        frames = self.__calc_frames()
+        seg = self.__calc_segments(frames)
+        seg_features = {}
+       
+        total_segs_rgb = []
+        for iter in range(seg):
+            rgb_features = []
+            rgb_frames = []
+            sum_rgb_features = None
+            prev_max_frames= iter * self.segment_len
+            if frames > prev_max_frames:
+                for rgb in self.__frame_iterator(every_ms=1000.0 / self.frame_rate, max_prev_frames=prev_max_frames, max_num_frames=prev_max_frames + self.segment_len):
+                    features = self.extractor.extract_rgb_frame_features(rgb[:, :, ::-1])
+                    rgb_frames.append(rgb)
+
+                    if sum_rgb_features is None:
+                        sum_rgb_features = features
+                    else:
+                        sum_rgb_features += features
+                    rgb_features.append(self.__quantize(features))
+
+                    if not rgb_features:
+                        print >> sys.stderr, 'Could not get features for ' + self.video_path
+                        total_error +=1
+                        continue
+                mean_rgb_features = sum_rgb_features / len(rgb_features)
+                seg_features['seg'+ str(iter +1)] = mean_rgb_features
+                rgb_img= np.array(rgb_frames)
+                total_segs_rgb.append(rgb_img) 
+            
+
+        print("total_seg_rgb +++++ ",len(total_segs_rgb))
+        print("testing ")
+        print("length of seg_features", len(seg_features))
+        print("segments +++++ ", seg)
+
+
+        return total_segs_rgb , seg_features;
+
+
+    def get_frames(self):
+        total_error = 0
+        frames = self.__calc_frames()
+        seg = self.__calc_segments(frames)
+        seg_features = {}
+       
+        total_segs_rgb = []
+        for iter in range(seg):
+            rgb_features = []
+            rgb_frames = []
+            sum_rgb_features = None
+            prev_max_frames= iter * self.segment_len
+            dim = (224, 224)
+            if frames > prev_max_frames:
+                for rgb in self.__frame_iterator(every_ms=1000.0 / self.frame_rate, max_prev_frames=prev_max_frames, max_num_frames=prev_max_frames + self.segment_len):
+                    # resizing images to 224 X 224
+                    rgb = cv2.resize(rgb, dim, interpolation = cv2.INTER_AREA)
+                    
+                    # normalizing the frames
+                    rgb = rgb/255.0
+                    rgb_frames.append(rgb)
+
+            
+                rgb_img= np.array(rgb_frames)
+                total_segs_rgb.append(rgb_img) 
+            
+        print("total_seg_rgb +++++ ",len(total_segs_rgb))
+        print("testing ")
+        return np.array(total_segs_rgb)
+
+    
\ No newline at end of file
diff --git a/preprocessing.py b/preprocessing.py
new file mode 100644
index 0000000..3085512
--- /dev/null
+++ b/preprocessing.py
@@ -0,0 +1,179 @@
+import feature_extractor as ext
+import os
+import numpy as np
+import subprocess
+import multiprocessing
+from datetime import datetime
+import cv2
+import ffmpy3
+from ffmpy3 import FFmpeg
+import math
+import h5py
+
+
+class anomaly():
+    def __init__(self):
+        super().__init__()
+    
+    def extract(self, path, filename): 
+       video_path = path
+       e1 = ext.Extractor(video_path, 60, 1)
+       try:
+           e1.write_h5(filename)
+           print("features extracted")
+       except:
+           print("error encountered")
+    
+    def load_videos_from_dir(self, path):
+        # load all the videos in a directory 
+        vid_data = [] 
+        video_path = os.path.join(path, os.listdir(path)[0])
+        e1 = ext.Extractor(video_path, 60, 1)
+        data = e1.load_vid_data()
+        for i in range (1, 50):
+            video_path = os.path.join(path, os.listdir(path)[i])
+            print("\nloading video number : ", i )
+            print("path = ", video_path)
+            e1 = ext.Extractor(video_path, 60, 1)
+            data2 = e1.load_vid_data()
+            print("shape of Data : ",data2.shape)
+            data = np.concatenate((data,data2), axis=0)
+        return data
+
+    def load_video( self, path, width=240,height=240, frame_rate = 2, segment=32, normalize=True):
+        segment = segment-1
+        e1 = ext.Extractor(video_path, segment, frame_rate)
+        data = e1.load_vid_data(width,height,normalize)
+        return data 
+
+    def segment_videos(self, path , frame_rate):
+        e1 = ext.Extractor(path, 60, 1)
+        
+        # this returns a list containing each segment from the video 
+        # each segment has a frames (the frames are numpy array) so list[0] contains frames for 1st segment , list[1] contains frames for second segment and so on 
+        data = e1.load_segmentated_video()
+
+        # saving the segmentated frames as a video in the static folder
+        dir_path = "/home/khawar/Documents/AutoEncoder/Samran_Code/api/static/"
+
+        for i in range(len(data)):
+            filename = "output" +str(i) +".webm"
+            path = dir_path + filename
+            out = cv2.VideoWriter(path , cv2.VideoWriter_fourcc(*"vp80"), frame_rate, (320, 240))
+            for frame in data[i]:
+                out.write(frame) # frame is a numpy.ndarray with shape (240, 320, 3)
+            out.release()
+
+        print(" video saved ! ")
+
+        return len(data)
+
+    def testing_function(self , path ):
+        e1 = ext.Extractor(path, 60, 1)
+        
+        # this returns a list containing each segment from the video 
+        # each segment has a frames (the frames are numpy array) so list[0] contains frames for 1st segment , list[1] contains frames for second segment and so on 
+        
+
+        return e1.segment_video_extract_features()
+    
+    
+    def load_seg_and_classify(self, path):
+        e1 = ext.Extractor(path, 60, 1)
+        # this returns a list containing each segment from the video 
+        # each segment has a frames (the frames are numpy array) so list[0] contains frames for 1st segment , list[1] contains frames for second segment and so on 
+        data = e1.load_segmentated_video()
+        dir_path = "/home/khawar/Documents/AutoEncoder/Samran_Code/api/static/"
+        for i in range(len(data)):
+            filename = "output" +str(i) +".webm"
+            path = dir_path + filename
+            out = cv2.VideoWriter(path , cv2.VideoWriter_fourcc(*"vp80"), frame_rate, (320, 240))
+            for frame in data[i]:
+                out.write(frame) # frame is a numpy.ndarray with shape (240, 320, 3)
+            out.release()
+
+    def extract_feat_and_save_video(self , path , filename, frame_rate):
+
+       video_path = path
+       e1 = ext.Extractor(video_path, 60, 1)
+        # saving the segmentated frames as a video in the static folder
+       dir_path = "/home/khawar/Documents/AutoEncoder/Samran_Code/api/static/"
+    
+       try:
+           data = e1.write_h5_and_return_frames(filename)
+           print("features extracted")
+       except:
+           print("error encountered")
+           
+       for i in range(len(data)):
+           filename = "output" +str(i) +".webm"
+           path = dir_path + filename
+           out = cv2.VideoWriter(path , cv2.VideoWriter_fourcc(*"vp80"), frame_rate, (320, 240))
+           for frame in data[i]:
+               out.write(frame) # frame is a numpy.ndarray with shape (240, 320, 3)
+           out.release()
+
+       print(" video saved ! ")
+
+       return len(data)
+
+    def writefeatures(self , path , filename):
+        video_path = path
+        e1 = ext.Extractor(video_path, 60, 1)
+        # saving the segmentated frames as a video in the static folder
+        dir_path = "/home/khawar/Documents/AutoEncoder/Samran_Code/api/static/"
+        try:
+            data = e1.write_h5_and_return_frames(filename)
+            print("features extracted")
+        except:
+            print("error encountered")
+
+    def save_video(self , path, filename, frame_rate):
+        video_path = path
+        e1 = ext.Extractor(video_path, 60, 1)
+        data = e1.write_h5_and_return_frames(filename)
+        for i in range(len(data)):
+            filename = "output" +str(i) +".webm"
+            path = dir_path + filename
+            out = cv2.VideoWriter(path , cv2.VideoWriter_fourcc(*"vp80"), frame_rate, (320, 240))
+            for frame in data[i]:
+                out.write(frame) # frame is a numpy.ndarray with shape (240, 320, 3)
+            out.release()
+
+        print(" video saved ! ")
+
+        return len(data) 
+    
+    # the following function returns the duration of video in seconds 
+    def get_length(self , path):
+        result = subprocess.run(["ffprobe", "-v", "error", "-show_entries",
+                             "format=duration", "-of",
+                             "default=noprint_wrappers=1:nokey=1", path],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT)
+        return float(result.stdout) 
+
+    # The following function breaks the video into equal chuncks of a fixed timestep and returns a list of filenames
+    def preprocess_video(self , path , sec, filename , target_dir):
+        
+        duration = self.get_length(path)
+        # calculating video segments
+        seg  = duration/sec
+        seg = math.ceil(seg)
+        print("number of segments : ", seg)
+        names = []
+        # creating file name and deleting video files if they already exist so no interupt is generated for video re-write
+        for i in range(seg):
+            file_name =  filename[:-4]
+            print("checking file ",file_name)
+            file_name= file_name +"-"+str(i+1)+ "-of-" +str(seg)+".mp4"
+            deleting_files_path = "/".join([target_dir,file_name])
+
+            if os.path.exists(deleting_files_path):
+                os.remove(deleting_files_path)
+            
+            names.append(file_name)
+        
+        chunks = subprocess.run(["python","ffmpeg-split.py", "-f", path , "-s", str(sec)])
+        
+        return names