John Alden - Hash.Flatten-0.01

NAME

Hash.Flatten - flatten/unflatten complex data structures to key-value form

SYNOPSIS

With JSAN
Standalone

DESCRIPTION

Converts back and forth between a nested data structure and a flat hash of delimited key-value pairs.

        data = { 'x' : 1, 'y' : { 'a' : 2, 'b' : { 'p' : 3, 'q' : 4 } } };
        flat = { 'x' : 1, 'y.a' : 2, 'y.b.p' : 3, 'y.b.q' : 4 };

This is useful for passing deep structures over protocols that only support key-value pairs (such as CGI and ActionScript LoadVars) without having to serialise them.

Note that I'm use the word "hash" here to refer to a JS Object only containing properties, representing an associative array. I'm conscious that might sound a bit perl-centric but since everything including an Array is an Object in JS, I wanted a less unambiguous term than "Object" and more snappy than "Associative Array" or "Anonymous Object".

LIMITATIONS

The top level of the nested data structure must be a hash.
Structures with circular references are not allowed and will result in an exception being thrown.
Structures with multiple references to the same object will flatten OK, but when unflattened, the shared references will be lost:

METHODS

o = new Hash.Flatten(options): The options hashref can be used to override the default behaviour (see "OPTIONS").
flat = o.flatten(deep): Reduces a nested data-structure to key-value form.
deep = o.unflatten(flat): Reconstitutes the nested structure from the flattened key-value form.

OPTIONS

HashDelimiter and ArrayDelimiter: By default, hash dereferences are denoted by a dot, and array dereferences are denoted by a colon. However you may change these characters to any string you want, because you don't want there to be any confusion as to which part of a string is the 'key' and which is the 'delimiter'. You may use multicharacter strings if you prefer.
EscapeSequence: This is the character or sequence of characters that will be used to escape the hash and array delimiters. If this is set to undef, no escaping will be done. The default escape sequence is a backslash. The escaping strategy is to place the escape sequence in front of delimiter sequences; the escape sequence itself is escaped by replacing it with two instances.

COMPATIBILITY

Requires JS 1.5 or above. Tested in Firefox 1.5 and IE 6.

AUTHOR

Hash::Flatten perl module by John Alden + P Kent. Javascript version by John Alden [johna@cpan.org].

COPYRIGHT

Hash::Flatten perl module (c) BBC 2005. Javascript version (c) John Alden 2006.

This is free software; you can redistribute it and/or modify it under the GNU GPL. See the file COPYING in this distribution, or http://www.gnu.org/licenses/gpl.txt

///////////////////////////////////////////////////////////////////
//
// Hash.Flatten
// $Id: Flatten.js,v 1.1 2006/10/27 19:12:00 john Exp $
// (c) 2006 John Alden
// Based on http://search.cpan.org/dist/Hash-Flatten/ (c) BBC 2005
// See ../../doc/ for more information
//
///////////////////////////////////////////////////////////////////

//Declare "namespace" if not already declared
if(typeof(Hash) == "undefined") Hash = {};

//Ctor
Hash.Flatten = function (opts) {
	if(typeof(opts) == "undefined") opts = {};
	this.HashDelimiter = opts.HashDelimiter || '.';
	this.ArrayDelimiter = opts.ArrayDelimiter || ':';
	this.EscapeSequence = opts.EscapeSequence || "\\";
	
	//Check delimiters don't contain escape seq
	if(this.HashDelimiter.indexOf(this.EscapeSequence) > -1) throw("Hash delimiter cannot contain escape sequence");
	if(this.ArrayDelimiter.indexOf(this.EscapeSequence) > -1) throw("Array delimiter cannot contain escape sequence");
}

Hash.Flatten.VERSION = '0.01';

///////////////////////////////////////////////////////////////////
//Public Methods
///////////////////////////////////////////////////////////////////
Hash.Flatten.prototype.flatten = function(obj) {
	//Flatten to an array of key, value
	var flatlist = this._flatten("", obj, new Array());
	
	//Create a hash from this
	var rv = {};
	for(var k=0; k<flatlist.length; k+=2) {
		rv[flatlist[k]] = flatlist[k+1];
	}
	return rv;
};

Hash.Flatten.prototype.unflatten = function(hashref) {
	var rv = {};
	
	var regex = new RegExp("(?:"+Hash.Flatten.quotemeta(this.ArrayDelimiter)+"|"+Hash.Flatten.quotemeta(this.HashDelimiter)+")");
	
	for(var k in hashref) {
		var v = hashref[k];

		//need to emulate negative look-behind to not match if there is a preceeding escape seq
		var levels = Hash.Flatten.escaped_split(k, regex, this.EscapeSequence);

		//Start the pointer at the top of the data structure
		var ptr = rv;
		
		//Pop off deepest level	to make levels an even number
		var finalkey = this._unescape(levels.pop());

		//Work through the levels 2 at a time: (key, type) pairs
		while(levels.length >= 2) {
			var key = this._unescape(levels.shift());
			var datatype = levels.shift();
			if(datatype == this.HashDelimiter) {
				if(typeof(ptr[key]) == "undefined") ptr[key]={};
			} else {
				if(typeof(ptr[key]) == "undefined") ptr[key]=[];		
			}
			ptr = ptr[key]; //Slide the pointer down the data structure
		}
		
		//Finally use that popped off value to make the last ref that actually points at the value
		ptr[finalkey] = v;	
	}
	return rv;
};

///////////////////////////////////////////////////////////////////
//Private Methods
///////////////////////////////////////////////////////////////////

Hash.Flatten.prototype._flatten = function(flatkey, val, refs_seen) {
	var rv;
	if(typeof(val) == "object") {
		//Check for circular refs
		for(var i=refs_seen.length-1; i>=0; i--) {
			if(refs_seen[i] == val) throw("circular reference detected");	
		}
		
		refs_seen.push(val); //Add to stack before going down to next level
		var delim = (val instanceof Array)? this.ArrayDelimiter : this.HashDelimiter;
		rv = this._flatten_obj(val, refs_seen, flatkey, delim); //Descend
		refs_seen.pop(); //Remove from stack when returning from next level
	} else {
		rv = [flatkey, val]; //Simple types
	}
	return rv;
}

Hash.Flatten.prototype._flatten_obj = function(obj, refs_seen, prefix, delim) {
	var rv = [];
	for(var k in obj) {
		var v = obj[k];
		k = this._escape(k);
		var flatkey = prefix? prefix+delim+k : k; //prepend prefix (if present) to key
		var flat_arr = this._flatten(flatkey, v, refs_seen);
		rv = rv.concat(flat_arr); //append array from flattening this k+v to the result array
	}
	return rv;
}

Hash.Flatten.prototype._escape = function(val) {
	var es = this.EscapeSequence; //shorthand name
	if(!es) return val; //no-op
	
	var string = val.toString(); //might not be a string
	var delims = [es, this.HashDelimiter, this.ArrayDelimiter]; //put escape seq in front of any of these
	for(var idx in delims) {
		var delim = delims[idx];
		var rx = "("+Hash.Flatten.quotemeta(delim)+")";
		string = string.replace(new RegExp(rx,'g'),es+"$1");
	}
	return string;
}

Hash.Flatten.prototype._unescape = function(val) {
	var es = this.EscapeSequence; //shorthand name
	if(!es) return val; //no-op
	
	var string = val.toString(); //might not be a string
	var qes = Hash.Flatten.quotemeta(es); //shorthand name
	
	//Remove escape characters apart from double-escapes
	string = string.replace(new RegExp(qes+"(?!"+qes+")",'gm'),"");
	
	//Fold double-escapes down to single escapes	
	string = string.replace(new RegExp(qes+qes,'gm'),es);

	return string;
}

///////////////////////////////////////////////////////////////////
//Utility Functions - currently parked on class
///////////////////////////////////////////////////////////////////

/*
	Emulate a capturing regex split with a negative lookbehind
	In perl this looks something like:
		@array = split($string, /(?<!$escape_sequence)($regex)/);

*/
Hash.Flatten.escaped_split = function(string, split_rx, esc_ptn) {	
	var esc_rx = new RegExp(Hash.Flatten.quotemeta(esc_ptn)+"$");

	var result = [];										
	var remainder = string;
	var buf="";
	
	while(remainder.length > 0) {
		var rv = remainder.match(split_rx);
		if(rv) {
			var idx = rv.index;
			var chunk = remainder.slice(0,idx); //String before delimiter
			var sep = rv[0]; //Matched delimiter
			var is_escaped = chunk.search(esc_rx) > -1; //Is delimiter escaped?
							
			//Accumulate until an unescaped delimiter is found
			buf+=chunk;
			if(is_escaped) {
				buf+=sep;
			} else {
				result.push(buf,sep);
				buf = "";
			}

			//chop off chunk + sep, and go round again
			remainder = remainder.slice(idx+sep.length);
		} else {
			//Last bit
			buf+=remainder;
			remainder = "";	
			result.push(buf);
		}
	}
	return result;
}

/*
	Crudely emulate perl's quotemeta function 
*/
Hash.Flatten.quotemeta = function(string) {
	return string.replace(/(\W)/g,"\\$1");
}