Sunday, June 28, 2015

Implement Garbage Collector



Baby's First Garbage Collector – journal.stuffwithstuff.com




  • Any object that’s being referenced by a variable that’s still in scope is in use.
  • Any object that’s referenced by another object that’s in use is in use.


    1. Starting at the roots, traverse the entire object graph. Every time you reach an object, set a “mark” bit on it to true.
    2. Once that’s done, find all of the objects whose mark bits are not set and delete them.
    https://github.com/munificent/mark-sweep/blob/master/main.c
    typedef enum {
      OBJ_INT,
      OBJ_PAIR
    } ObjectType;

    typedef struct sObject {
      ObjectType type;
      unsigned char marked;

      /* The next object in the linked list of heap allocated objects. */
      struct sObject* next;

      union {
        /* OBJ_INT */
        int value;

        /* OBJ_PAIR */
        struct {
          struct sObject* head;
          struct sObject* tail;
        };
      };
    } Object;

    typedef struct {
      Object* stack[STACK_MAX];
      int stackSize;

      /* The first object in the linked list of all objects on the heap. */
      Object* firstObject;

      /* The total number of currently allocated objects. */
      int numObjects;

      /* The number of objects required to trigger a GC. */
      int maxObjects;
    } VM;

    void assert(int condition, const char* message) {
      if (!condition) {
        printf("%s\n", message);
        exit(1);
      }
    }

    VM* newVM() {
      VM* vm = malloc(sizeof(VM));
      vm->stackSize = 0;
      vm->firstObject = NULL;
      vm->numObjects = 0;
      vm->maxObjects = 8;
      return vm;
    }


    void push(VM* vm, Object* value) {
      assert(vm->stackSize < STACK_MAX, "Stack overflow!");
      vm->stack[vm->stackSize++] = value;
    }


    Object* pop(VM* vm) {
      assert(vm->stackSize > 0, "Stack underflow!");
      return vm->stack[--vm->stackSize];
    }

    void mark(Object* object) {
      /* If already marked, we're done. Check this first to avoid recursing
         on cycles in the object graph. */
      if (object->marked) return;

      object->marked = 1;

      if (object->type == OBJ_PAIR) {
        mark(object->head);
        mark(object->tail);
      }
    }

    void markAll(VM* vm)
    {
      for (int i = 0; i < vm->stackSize; i++) {
        mark(vm->stack[i]);
      }
    }

    void sweep(VM* vm)
    {
      Object** object = &vm->firstObject;
      while (*object) {
        if (!(*object)->marked) {
          /* This object wasn't reached, so remove it from the list and free it. */
          Object* unreached = *object;

          *object = unreached->next;
          free(unreached);

          vm->numObjects--;
        } else {
          /* This object was reached, so unmark it (for the next GC) and move on to
           the next. */
          (*object)->marked = 0;
          object = &(*object)->next;
        }
      }
    }

    void gc(VM* vm) {
      int numObjects = vm->numObjects;

      markAll(vm);
      sweep(vm);

      vm->maxObjects = vm->numObjects * 2;

      printf("Collected %d objects, %d remaining.\n", numObjects - vm->numObjects,
             vm->numObjects);
    }

    Object* newObject(VM* vm, ObjectType type) {
      if (vm->numObjects == vm->maxObjects) gc(vm);

      Object* object = malloc(sizeof(Object));
      object->type = type;
      object->next = vm->firstObject;
      vm->firstObject = object;
      object->marked = 0;

      vm->numObjects++;

      return object;
    }

    void pushInt(VM* vm, int intValue) {
      Object* object = newObject(vm, OBJ_INT);
      object->value = intValue;

      push(vm, object);
    }

    Object* pushPair(VM* vm) {
      Object* object = newObject(vm, OBJ_PAIR);
      object->tail = pop(vm);
      object->head = pop(vm);

      push(vm, object);
      return object;
    }

    void objectPrint(Object* object) {
      switch (object->type) {
        case OBJ_INT:
          printf("%d", object->value);
          break;

        case OBJ_PAIR:
          printf("(");
          objectPrint(object->head);
          printf(", ");
          objectPrint(object->tail);
          printf(")");
          break;
      }
    }

    void freeVM(VM *vm) {
      vm->stackSize = 0;
      gc(vm);
      free(vm);
    }
    https://tianrunhe.wordpress.com/2012/03/24/data-structure-and-algorithms-for-garbage-collection-in-c/
    Read full article from Baby's First Garbage Collector – journal.stuffwithstuff.com

    Labels

    Review (572) System Design (334) System Design - Review (198) Java (189) Coding (75) Interview-System Design (65) Interview (63) Book Notes (59) Coding - Review (59) to-do (45) Linux (43) Knowledge (39) Interview-Java (35) Knowledge - Review (32) Database (31) Design Patterns (31) Big Data (29) Product Architecture (28) MultiThread (27) Soft Skills (27) Concurrency (26) Cracking Code Interview (26) Miscs (25) Distributed (24) OOD Design (24) Google (23) Career (22) Interview - Review (21) Java - Code (21) Operating System (21) Interview Q&A (20) System Design - Practice (20) Tips (19) Algorithm (17) Company - Facebook (17) Security (17) How to Ace Interview (16) Brain Teaser (14) Linux - Shell (14) Redis (14) Testing (14) Tools (14) Code Quality (13) Search (13) Spark (13) Spring (13) Company - LinkedIn (12) How to (12) Interview-Database (12) Interview-Operating System (12) Solr (12) Architecture Principles (11) Resource (10) Amazon (9) Cache (9) Git (9) Interview - MultiThread (9) Scalability (9) Trouble Shooting (9) Web Dev (9) Architecture Model (8) Better Programmer (8) Cassandra (8) Company - Uber (8) Java67 (8) Math (8) OO Design principles (8) SOLID (8) Design (7) Interview Corner (7) JVM (7) Java Basics (7) Kafka (7) Mac (7) Machine Learning (7) NoSQL (7) C++ (6) Chrome (6) File System (6) Highscalability (6) How to Better (6) Network (6) Restful (6) CareerCup (5) Code Review (5) Hash (5) How to Interview (5) JDK Source Code (5) JavaScript (5) Leetcode (5) Must Known (5) Python (5)

    Popular Posts