*! v6.0 19Oct01 /* This program written by Jonah B. Gelbach to economize on dofile text. It handles variations on standard sort-and-merge routines. syntax is: jmerge using [ , [[no]autosort] [[no]tempfile] [keepif(string)] [nodroppre] [nodroppost] [keeping(varlist)] [dropping(varlist)] [Rename] ] where is a list of the variables on which the merge happens. note that if is empty, then jmerge will assume you did not want to merge on any variables (the stata default would normally cause an empty varlist slot to contain all vars in the dataset). is the name of the dataset used to merge against the current one default values of autosort and tempfile are empty, which means those options are turned on. to turn off, use "noauto" and/or "notempfile". autosort turned on means that the using data will be sorted for the user automatically. tempfile turned on means that a tempfile will be created and used to store the sorted using data, rather than "save, replace"ing the actual using dataset. this is useful if, for example, the using data shows up as a prerequisite in a Makefile. the only disadvantage of this approach is that there may not be enough space in the /tmp filesystem to store the data, in which case we'll bomb. default action for "keepif" option is to issue the command keep if _merge == 3 any string provided as an argument will instead induce the command keep if `keepif' where the local macro `keepif' contains the verbatim argument supplied in the keepif() option. note that you can keep all observations by using "keepif(1)" nodroppost tells stata not to drop the _merge variable after merging. nodroppre tells -jmerge- not to drop an existing _merge variable (this means that your merge can't happen). it basically allows you to avoid accidentally dropping a _merge that you meant to save somewhere. keeping contains a list of variables in the using data to keep before the merge. note that if you use keeping, -jmerge- will automatically override any notemp specification in order to avoid overwriting your usingdata. dropping contains a list of variables in the using data to drop before the merge note that if you use dropping, -jmerge- will automatically override any notemp specification in order to avoid overwriting your usingdata. renaming contains a list of pairs, such that the first token in each list is the name of an existing variable in the using data and the second token is what we want to rename it to. this is useful when you don't want to rename vars in the master data before and after the merge. note that each pair should be comma-delimited, i.e. "pair1, pair2", or "old1 new1 , old2 new2". */ program define jmerge version 6.0 local varlist "optional existing none" local using "required existing noprefix" local options " noAUTOsort KEEPIF(string) noTEMPfile noDroppre noDroppost KEEPING(string) DROPPING(string) REName(string)" parse "`*'" if "`rename'" == "" { local rename "_RENMISS" } /* if don't specify a renaming string, we have to make sure autosort and droppre know what to do */ local tokens : word count `varlist' /* need to do this in case user omits "using", or else we'll get an infinite loop */ local ntl = `tokens' - 1 /* index of next-to-last token */ parse "`varlist'", parse(" ") if "`keeping'" ~= "" & "`dropping'" ~= "" { /* user asked for both keeping and dropping lists */ di _new "-> Please use at most one of the KEEPING or DROPPING options." _new } /* done checking for both keeping and dropping lists */ else if "`keeping'" ~= "" { /* user wants only KEEPING */ local keeping "keeping(`keeping')" } else if "`dropping'" ~= "" { /* user wants only DROPPING */ local dropping "dropping(`dropping')" } /* done fixing KEEPING and DROPPING */ while "`1'" ~= "" { /* while loop for sort-vars */ local sort "`sort' `1' " /* appending current token to sort-vars list */ macro shift } /* end of while loop for sort-vars */ /* now doing the guts of the program */ di if "`varlist'" ~= "" { /* there were sorting vars */ di "-> sort `sort'" sort `sort' } /* done checking for sorting vars */ di if "`keeping'" == "" & "`dropping'" == "" & "`rename'" == "_RENMISS" { /* user did not include KEEPING, DROPPING, or RENAME */ di "-> cap noisily merge `sort' using `using'" cap noisily merge `sort' using `using' if _rc == 5 { /* using data not sorted */ di _new "Using data not sorted." _new di " -> autosort , sort(`sort') using(`using') rename(`rename') `tempfile' `autosort' `keeping' `dropping'" autosort , sort(`sort') using(`using') rename(`rename') `tempfile' `autosort' `keeping' `dropping' if "$S_rc" == "9" { exit } } /* note that _rc 110 has precedence over _rc 5 */ else if _rc == 110 { /* probly b/c _merge variable already exists */ cap confirm _merge if _rc ~= 0 {exit 110} /* exit with previous return code, since _merge apparently does *not* exist */ if "`droppre'" ~= "nodroppre" { /* user did not request to not drop the _merge variable in this situation */ droppre , sort(`sort') using(`using') rename(`rename') `tempfile' `autosort' /* */ `keeping' `dropping' } /* done with droppre not set to avoid automatic drop of _merge before merge */ else { /* user didn't want to drop _merge */ di _new "_merge already defined: you can't do a new merge until you drop it." di _new "You should either drop _merge manually or re-call -jmerge-, " di "specifying the dropmerge option" exit } /* done dealing with droppre option */ } /* done dealing with situation in which _merge variable already exists */ else if _rc ~= 0 { /* there must have been some other error */ exit _rc } /* done with if for some other error */ } /* done with if on KEEPING and DROPPING both being empty */ else { /* user did include KEEPING, DROPPING, or RENAME, so we'll use -droppre- to call -autosort- */ /* reason we have to do things this way is that if we just use cap noisily merge as above, then we will not be able to drop/keep relevant vars in usingdata. this is why we call -droppre-, defined below, which handles the direct call to -autosort-, also defined below */ if "`droppre'" ~= "nodroppre" { /* user did not request to not drop the _merge variable in this situation */ di " -> droppre, sort(`sort') using(`using') rename(`rename') `tempfile' `autosort' /*" droppre, sort(`sort') using(`using') rename(`rename') `tempfile' `autosort' /* */ `keeping' `dropping' } /* done with droppre not set to avoid automatic drop of _merge before merge */ else { /* user didn't want to drop _merge before merging */ cap confirm _merge if _rc ~= 0 { /* _merge apparently does *not* exist */ di " -> autosort , sort(`sort') using(`using') rename(`rename') `tempfile' `autosort' `keeping' `dropping'" autosort , sort(`sort') using(`using') rename(`rename') `tempfile' `autosort' `keeping' `dropping' if "$S_rc" == "9" { exit } } /* done running autosort when _merge does not exist */ else { /* _merge apparently *does* exist */ di _new "_merge already defined: you can't do a new merge until you drop it." di _new "You should either drop _merge manually or re-call -jmerge-, " di "specifying the dropmerge option" exit } /* done handling whether _merge exists in potential call to -droppre- */ } /* done dealing with else block when droppre option set to "nodroppre" */ } /* user did include KEEPING, DROPPING, or RENAME, so we'll use -droppre- to call -autosort- */ di "-> tab _merge" tab _merge di if "`keepif'" == "" { /* user didn't specify keepif option, so default is to keep only those with _merge==3 */ di "-> keep if _merge == 3" keep if _merge == 3 } else { /* user did specify keepif option */ di " -> keep if `keepif'" keep if `keepif' } /* done dealing with keepif stuff */ di if "`droppost'" ~= "nodroppost" { /* user did not request to not drop the _merge variable */ di "-> drop _merge" drop _merge } /* done with if on dropping the _merge variable */ /* all done */ end /* this program handles all autosort-related stuff */ program define autosort local options "SORT(string) noAUTOsort USING(string) noTEMPfile KEEPING(string) DROPPING(string) RENAME(string)" parse "`*'" if "`autosort'" ~= "noautosort" { /* user wants using data to be sorted automatically if necessary */ di "-> -autosort- will now sort your using dataset..." di "-> preserve" preserve di "-> use `using' , clear" use `using' , clear if "`sort'" ~= "" { /* there were s vars - this should be extraneous, since _rc==5 happens only with sorts */ di "-> sort `sort'" sort `sort' } /* done checking for sorting vars */ if "`tempfile'" == "notempfile" /* */ & "`keeping'" == "" & "`dropping'" == "" & "`rename'" == "_RENMISS" { /* user asked not to have a tempfile to be used in the save */ di "-> save `using', replace" save `using', replace } else { /* user asked (if only by default) for a tempfile to be used in lieu of saving using data */ di "-> tempfile tusing" tempfile tusing di _new "-> You requested a temporary file to be used. It is called" _new _new "-> `tusing'" _new di _new "-> Continuing..." _new if "`keeping'" ~= "" { /* user wants to keep only some variables */ unabbrev `keeping' di "-> keep `sort' $S_1" keep `sort' $S_1 } else if "`dropping'" ~= "" { /* user wants to drop some variables */ /* note to self: should add a validation that vars in `sort' aren't dropped */ unabbrev `dropping' di "-> drop $S_1" drop $S_1 } /* done dealing with keeping/dropping */ if "`rename'" ~= "_RENMISS" { newname `rename' } di "-> save `tusing', replace" save `tusing', replace di di "-> local using `tusing'" local using "`tusing'" } /* done with any issues involving tempfiles */ di di "-> restore" restore di "-> merge `sort' using `using'" merge `sort' using `using' di global S_rc = 0 /* need to do this explicitly since this is a global */ } /* done with if for autosort */ else { /* user didn't want autosort, and the using data is not sorted */ di "You must either manually sort the using data or" di "re-call -jmerge- with the autosort (and maybe the " di "tempfile) option" _new global S_rc = 9 exit } /* done dealing with all autosort stuff */ end program define droppre local options "SORT(string) noAUTOsort USING(string) noTEMPfile KEEPING(string) DROPPING(string) RENAME(string)" parse "`*'" cap confirm _merge if _rc == 0 { /* _merge exists, which is bad for business */ di _new "-> _merge already defined, but being dropped automatically." _new di "-> drop _merge" drop _merge } /* done dropping _merge when it exists */ di if "`keeping'" == "" & "`dropping'" == "" & "`rename'" == "_RENMISS" { /* user did not include either KEEPING or DROPPING */ di "-> cap noisily merge `sort' using `using'" cap noisily merge `sort' using `using' if _rc == 5 { /* using data not sorted */ di _new "Using data not sorted." _new di " -> autosort , sort(`sort') using(`using') rename(`rename') `tempfile' `autosort' `keeping' `dropping'" autosort , sort(`sort') using(`using') rename(`rename') `tempfile' `autosort' `keeping' `dropping' if "$S_rc" == "9" { exit } } /* done with if-block for using data not sorted */ } /* done when user did not include KEEPING or DROPPING */ else { /* user did include either KEEPING or DROPPING, so we'll use autosort */ if "`keeping'" ~= "" { /* user wants only KEEPING */ local keeping "keeping(`keeping')" } else if "`dropping'" ~= "" { /* user wants only DROPPING */ local dropping "dropping(`dropping')" } /* done fixing KEEPING and DROPPING */ di " -> autosort , sort(`sort') using(`using') rename(`rename') `tempfile' `autosort' `keeping' `dropping'" autosort , sort(`sort') using(`using') rename(`rename') `tempfile' `autosort' `keeping' `dropping' if "$S_rc" == "9" { exit } } /* done with autosort b/c of use of KEEPING or DROPPING */ di end