/**
* A component of a library for
* MoSeS.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
package uk.ac.leeds.ccg.andyt.projects.moses.io;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.io.Serializable;
import java.io.StreamTokenizer;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Random;
import java.util.Vector;
import uk.ac.leeds.ccg.andyt.projects.moses.misc.AgeConverter;
import uk.ac.leeds.ccg.andyt.projects.moses.utilities.StaticIO;
/**
* For accessing HSARDataRecords and information about them.
*/
public class HSARDataHandler extends AbstractDataHandler {
/**
* For storing all HSARDataRecords
*/
protected HSARDataRecord[] _HSARDataRecordArray;
/**
* A collection providing a convenient way to access data for specific
* AgeSex classes for HRPs. Keys are AgeSex. Values are Vectors of
* HSARDataRecords.
*/
protected HashMap _AgeSexHRP_HSARDataRecordVector_HashMap;
/**
* A collection providing a convenient way to access all other
* HSARDataRecords for a household with a given Household ID. Keys are
* integer HID. Values are Vectors (of HSARDataRecords) with the first being
* the Household Reference Person.
*/
protected HashMap _HID_HSARDataRecordVector_HashMap;
/** Creates a new instance of HSARDataHandler */
public HSARDataHandler() {
}
/**
* Creates a new instance of HSARDataHandler from aFile.
* @param aFile
*/
public HSARDataHandler(
File aFile) {
_Directory = aFile.getParentFile();
init(_Directory);
if (aFile.getName().endsWith(".dat")) {
init(aFile.getParentFile());
load(aFile);
this._RecordLength = new ISARDataRecord().getSizeInBytes();
loadIntoCache();
File thisFile = new File(
_Directory,
this.getClass().getCanonicalName() + ".thisFile");
StaticIO.writeObject(
this,
thisFile);
} else {
Object object = StaticIO.readObject(aFile);
HSARDataHandler aHSARDataHandler = (HSARDataHandler) object;
load(aFile);
this._RecordLength = aHSARDataHandler._RecordLength;
//this._RecordLength = new ISARDataRecord().getSizeInBytes();
this._HSARDataRecordArray = aHSARDataHandler._HSARDataRecordArray;
this._AgeSexHRP_HSARDataRecordVector_HashMap = aHSARDataHandler._AgeSexHRP_HSARDataRecordVector_HashMap;
this._HID_HSARDataRecordVector_HashMap = aHSARDataHandler._HID_HSARDataRecordVector_HashMap;
}
}
/**
* Loads from source file
*/
public void formatSource(
File sourceFile,
File formattedFile)
throws IOException {
_Logger.entering(this.getClass().getCanonicalName(), "formatSource(File,File)");
_File = formattedFile;
// _File = new File (
// _Directory,
// HSARDataRecords.dat);
if (!_File.exists()) {
this._File.createNewFile();
}
this._RandomAccessFile = new RandomAccessFile(this._File, "rw");
//File sourceFile = new File(
// "C:/Work/data/Census/2001/SAR/household/5278TAB/UKDA-5278-tab/tab/lichhd-051019.tab");
BufferedReader aBufferedReader =
new BufferedReader(
new InputStreamReader(
new FileInputStream(sourceFile)));
StreamTokenizer aStreamTokenizer =
new StreamTokenizer(aBufferedReader);
StaticIO.setStreamTokenizerSyntax2(aStreamTokenizer);
String line;
long RecordID = 0L;
HSARDataRecord aHSARDataRecord = new HSARDataRecord();
// Skip the first line
int tokenType = aStreamTokenizer.nextToken();
while (tokenType != StreamTokenizer.TT_EOL) {
tokenType = aStreamTokenizer.nextToken();
}
tokenType = aStreamTokenizer.nextToken();
boolean parsed = false;
while (tokenType != StreamTokenizer.TT_EOF) {
switch (tokenType) {
case StreamTokenizer.TT_EOL:
if (RecordID % 10000 == 0) {
log(aHSARDataRecord.toString());
}
// Write out householdSARRecord
if (parsed) {
aHSARDataRecord.write(this._RandomAccessFile);
// log( "this.tRandomAccessFile.length() " +
// this.tRandomAccessFile.length() );
RecordID++;
}
break;
case StreamTokenizer.TT_WORD:
line = aStreamTokenizer.sval;
parsed = aHSARDataRecord.parse(
RecordID,
line);
break;
}
tokenType = aStreamTokenizer.nextToken();
}
log("Number of HSARDataRecords loaded " + (RecordID + 1L));
this._RandomAccessFile.close();
this._RandomAccessFile = new RandomAccessFile(this._File, "r");
_Logger.exiting(this.getClass().getCanonicalName(), "formatSource(File,File)");
}
/**
* Loads this.tAGEHHRPVectors and this.tHouseholdVectors
*/
private void initVectors() {
this._HID_HSARDataRecordVector_HashMap = new HashMap();
this._AgeSexHRP_HSARDataRecordVector_HashMap = new HashMap();
AgeSex aAgeSex;
HSARDataRecord aHSARDataRecord;
long nDataRecords = getNDataRecords();
boolean HRP;
int HHID;
for (long RecordID = 0L; RecordID < nDataRecords; RecordID++) {
aHSARDataRecord = getHSARDataRecord(RecordID);
HRP = aHSARDataRecord.get_HRP();
if (HRP) {
aAgeSex = new AgeSex(
aHSARDataRecord.get_AGEH(),
aHSARDataRecord.get_SEX());
if (this._AgeSexHRP_HSARDataRecordVector_HashMap.containsKey(aAgeSex)) {
((Vector) this._AgeSexHRP_HSARDataRecordVector_HashMap.get(aAgeSex)).add(aHSARDataRecord);
} else {
Vector aHSARDataRecordsVector = new Vector();
aHSARDataRecordsVector.add(aHSARDataRecord);
this._AgeSexHRP_HSARDataRecordVector_HashMap.put(
aAgeSex,
aHSARDataRecordsVector);
}
}
HHID = aHSARDataRecord.get_HHID();
if (this._HID_HSARDataRecordVector_HashMap.containsKey(HHID)) {
// Add HRP at the start
if (HRP) {
((Vector) this._HID_HSARDataRecordVector_HashMap.get(HHID)).add(
0,
aHSARDataRecord);
} else {
((Vector) this._HID_HSARDataRecordVector_HashMap.get(HHID)).add(
aHSARDataRecord);
}
} else {
Vector aHSARDataRecordsVector = new Vector();
aHSARDataRecordsVector.add(aHSARDataRecord);
this._HID_HSARDataRecordVector_HashMap.put(
HHID,
aHSARDataRecordsVector);
}
}
//log("this.tHouseholdVectors.size() " + this._HID_HSARDataRecordsVector_HashMap.size());
}
/**
* Randomly prints out 20 HSAR records
*/
private void printHSARDataRecords(
Random aRandom,
int numberOfRecords) {
HSARDataRecord aHSARDataRecord = new HSARDataRecord();
int ID = Integer.MIN_VALUE;
int Age;
short aAgeClassHSARDataRecord;
boolean Sex;
AgeSex aAgeSex;
for (int i = 0; i < numberOfRecords; i++) {
Age = (short) aRandom.nextInt(100);
aAgeClassHSARDataRecord = AgeConverter.getAgeClassHSARDataRecord(Age);
Sex = aRandom.nextBoolean();
aAgeSex = new AgeSex(
aAgeClassHSARDataRecord,
Sex);
aHSARDataRecord = getHSARDataRecord(
aRandom,
aAgeSex);
if (aHSARDataRecord != null) {
log(aHSARDataRecord.toString());
}
}
}
/**
* Loads HSARDataRecords into the cache.
*/
public void loadIntoCache() {
_Logger.entering(this.getClass().getCanonicalName(), "loadIntoCache()");
long nDataRecords = super.getNDataRecords();
if (nDataRecords > Integer.MAX_VALUE) {
log("nDataRecords>Integer.MAX_VALUE");
System.exit(4);
}
this._HSARDataRecordArray = new HSARDataRecord[(int) nDataRecords];
try {
this._RandomAccessFile.seek(0);
} catch (IOException ioe0) {
ioe0.printStackTrace();
}
for (int _HSARRecordID = 0; _HSARRecordID < nDataRecords; _HSARRecordID++) {
this._HSARDataRecordArray[_HSARRecordID] = new HSARDataRecord(
this._RandomAccessFile);
if (_HSARRecordID % 10000 == 0) {
log("loadIntoCache " + _HSARRecordID);
}
}
initVectors();
_Logger.exiting(this.getClass().getCanonicalName(), "loadIntoCache()");
}
/**
* @param aRecordID
* The RecordID of the HSARDataRecord to be returned.
*/
public AbstractDataRecord getDataRecord(long aRecordID) {
return getHSARDataRecord(aRecordID);
}
/**
* @param aRecordID
* The RecordID of the HSARDataRecord to be returned.
*/
public HSARDataRecord getHSARDataRecord(long aRecordID) {
try {
return this._HSARDataRecordArray[(int) aRecordID];
} catch (NullPointerException aNullPointerException) {
try {
this._RandomAccessFile.seek(_RecordLength * aRecordID);
return new HSARDataRecord(this._RandomAccessFile);
} catch (IOException ioe0) {
ioe0.printStackTrace();
return null;
}
}
}
/**
* @return A pseudo random HRP HSARDataRecord.
* @param aRandom
* The Random used to select.
*/
public HSARDataRecord getHSARDataRecord(
Random aRandom) {
int age;
short aAgeClassHSARDataRecord;
HSARDataRecord aHSARDataRecord;
boolean sex;
AgeSex aAgeSex;
do {
age = aRandom.nextInt(100);
aAgeClassHSARDataRecord = AgeConverter.getAgeClassHSARDataRecord(age);
sex = aRandom.nextBoolean();
aAgeSex = new AgeSex(
aAgeClassHSARDataRecord,
sex);
aHSARDataRecord = getHSARDataRecord(aRandom, aAgeSex);
} while (aHSARDataRecord == null);
return aHSARDataRecord;
}
/**
* @return A Vector of HSARDataRecords for the Household which contains
* aHRPHSARDataRecord.
* @param aHRPHSARDataRecord
*/
public Vector getHSARDataRecordsVector(
HSARDataRecord aHRPHSARDataRecord) {
if (this._HID_HSARDataRecordVector_HashMap.containsKey(aHRPHSARDataRecord.get_HHID())) {
return (Vector) this._HID_HSARDataRecordVector_HashMap.get(aHRPHSARDataRecord.get_HHID());
} else {
log("HSARDataRecord" + aHRPHSARDataRecord.toString() + "is not an HRP!");
return (Vector) null;
}
}
/**
* @return A Vector of Vectors of HSARDataRecords for the Households with
* HRP as in aHRPHSARDataRecordsHashSet
* @param aHRPHSARDataRecordsHashSet
* A HashSet of HSARDataRecords for HRP for the HSARDataRecords
* returned in the Vector of Vectors
*/
public Vector getHSARDataRecordsVectors(
HashSet aHRPHSARDataRecordsHashSet) {
Vector result = new Vector(aHRPHSARDataRecordsHashSet.size());
Iterator aHRPHSARDataRecordsHashSetIterator = aHRPHSARDataRecordsHashSet.iterator();
Vector aHousehold;
while (aHRPHSARDataRecordsHashSetIterator.hasNext()) {
aHousehold = getHSARDataRecordsVector((HSARDataRecord) aHRPHSARDataRecordsHashSetIterator.next());
result.add(aHousehold);
}
return result;
}
/**
* @return All HSARDataRecords
in a HashSet for those HRP in
* aHPHRPHSARDataRecordsHashSet
*/
public HashSet getHSARDataRecordsHashSet(
HashSet aHPHRPHSARDataRecordsHashSet) {
// Get all Householders
HashSet tHSARDataRecords = new HashSet();
HSARDataRecord aHRPHSARDataRecord;
Vector aHousehold;
Iterator tIterator = aHPHRPHSARDataRecordsHashSet.iterator();
while (tIterator.hasNext()) {
aHRPHSARDataRecord = (HSARDataRecord) tIterator.next();
aHousehold = getHSARDataRecordsVector(aHRPHSARDataRecord);
tHSARDataRecords.addAll(aHousehold);
}
return tHSARDataRecords;
}
/**
* @return All HSARDataRecords
for the Households with HRPs as
* in HSARDataRecord[] tHPHRPs
*/
public Vector getHSARDataRecords(HSARDataRecord[] tHPHRPs) {
// Get all Householders
Vector result = new Vector();
Vector aHousehold;
for (int i = 0; i < tHPHRPs.length; i++) {
// log("i " + i + " from " + tHPHRPs.length );
if (tHPHRPs[i] == null) {
boolean debug = true;
log("Null in getHSARDataRecords(HSARDataRecord[])!");
} else {
aHousehold = getHSARDataRecordsVector(tHPHRPs[i]);
result.addAll(aHousehold);
}
}
return result;
}
/**
* @return All HSARDataRecords
for the Households with HRPs as
* in Vector tHPHRPs
*/
public Vector getHSARDataRecords(Vector tHPHRPRecordIDs) {
// Get all Householders
Vector result = new Vector();
Vector aHousehold;
for (int i = 0; i < tHPHRPRecordIDs.size(); i++) {
// log("i " + i + " from " + tHPHRPs.length );
if (tHPHRPRecordIDs.get(i) == null) {
log("Null in getHSARDataRecords(Vector)!");
} else {
aHousehold = getHSARDataRecordsVector(getHSARDataRecord((Long) tHPHRPRecordIDs.get(i)));
result.addAll(aHousehold);
}
}
return result;
}
/**
* @return A pseudo random HSARDataRecord with AgeSex as in aAgeSex.
*/
public HSARDataRecord getHSARDataRecord(
Random aRandom,
AgeSex aAgeSex) {
Object value = this._AgeSexHRP_HSARDataRecordVector_HashMap.get(aAgeSex);
if (value != null) {
Vector aHSARDataRecordVector = (Vector) value;
if (aHSARDataRecordVector.size() > 0) {
return (HSARDataRecord) aHSARDataRecordVector.get(
aRandom.nextInt(aHSARDataRecordVector.size()));
}
}
return null;
}
public HashMap get_AgeSexHRP_HSARDataRecordVector_HashMap() {
return _AgeSexHRP_HSARDataRecordVector_HashMap;
}
public HashMap get_HID_HSARDataRecordVector_HashMap() {
return _HID_HSARDataRecordVector_HashMap;
}
/**
* Method to be used to look up ISARDataRecord from ISARDataRecord._ID.
* @return a HashMap for looking up RecordID from ID
*/
public HashMap get_ID_RecordID_HashMap() {
HashMap tID_RecordID_HashMap = new HashMap();
HSARDataRecord aHSARDataRecord;
for (long RecordID = 0; RecordID < getNDataRecords(); RecordID++) {
aHSARDataRecord = getHSARDataRecord(RecordID);
tID_RecordID_HashMap.put(
aHSARDataRecord.get_ID(),
RecordID);
}
return tID_RecordID_HashMap;
}
@Override
public long getNDataRecords() {
return this._HSARDataRecordArray.length;
}
/**
* A simple class for distinguishing HSARDataRecord with _HRP = true into
* those with the same of _AGEH, _Sex
*/
public class AgeSex
implements Serializable, Comparable {
/**
* For storing _AGE
*/
protected short _Age;
/**
* For storing the _SEX
*/
protected boolean _Sex;
public AgeSex() {
}
public AgeSex(
short _Age,
boolean _Sex) {
this._Age = _Age;
this._Sex = _Sex;
}
public AgeSex(
AgeSex aAgeSex) {
this._Age = aAgeSex._Age;
this._Sex = aAgeSex._Sex;
}
public AgeSex(
HSARDataRecord aHSARDataRecord) {
_Age = aHSARDataRecord._AGEH;
_Sex = aHSARDataRecord._SEX;
}
public short get_Age() {
return _Age;
}
public boolean is_Sex() {
return _Sex;
}
@Override
public boolean equals(Object o) {
return (compareTo(o) == 0);
}
@Override
public int hashCode() {
int hash = 5;
hash = 89 * hash + this._Age;
hash = 89 * hash + (this._Sex ? 1 : 0);
return hash;
}
public int compareTo(Object o) {
if (o != null) {
if (o instanceof AgeSex) {
AgeSex aAgeSex = (AgeSex) o;
if (aAgeSex._Age > _Age) {
return 1;
}
if (aAgeSex._Sex == true && _Sex == false) {
return 1;
}
if (aAgeSex._Sex == false && _Sex == true) {
return -1;
}
return 0;
}
}
return -1;
}
}
}